From f019578183375c4c753c0ba565f956b7de2601ae Mon Sep 17 00:00:00 2001 From: Simon Belak Date: Mon, 30 Dec 2013 02:24:49 +0100 Subject: [PATCH] Selective auto-whitespace --- src/instaparse/combinators_source.clj | 18 +++++++++++------ src/instaparse/core.clj | 29 +++++++++++++++++---------- test/instaparse/core_test.clj | 21 +++++++++++++++++++ 3 files changed, 51 insertions(+), 17 deletions(-) diff --git a/src/instaparse/combinators_source.clj b/src/instaparse/combinators_source.clj index 458c97c..6cad90d 100644 --- a/src/instaparse/combinators_source.clj +++ b/src/instaparse/combinators_source.clj @@ -150,14 +150,20 @@ (assoc (cat ws-parser (dissoc parser :red)) :red (:red parser)) (cat ws-parser parser)))) -(defn auto-whitespace [grammar start grammar-ws start-ws] +(defn auto-whitespace [grammar start grammar-ws start-ws & {:keys [only except]}] (let [ws-parser (hide (opt (nt start-ws))) grammar-ws (assoc grammar-ws start-ws (hide-tag (grammar-ws start-ws))) - modified-grammar (into {} - (for [[nt parser] grammar] - [nt (auto-whitespace-parser parser ws-parser)])) - final-grammar (assoc modified-grammar start - (assoc (cat (dissoc (modified-grammar start) :red) + add-ws? (cond + only (set only) + except (complement (set except)) + :else (constantly true)) + modified-grammar (into {} + (for [[nt parser] grammar] + [nt (if (add-ws? nt) + (auto-whitespace-parser parser ws-parser) + parser)])) + final-grammar (assoc modified-grammar start + (assoc (cat (dissoc (modified-grammar start) :red) ws-parser) :red (:red (modified-grammar start))))] (merge final-grammar grammar-ws))) diff --git a/src/instaparse/core.clj b/src/instaparse/core.clj index 37fa436..d841adf 100644 --- a/src/instaparse/core.clj +++ b/src/instaparse/core.clj @@ -126,6 +126,9 @@ (binding [*out* writer] (println (print/Parser->str x)))) +(defn- ensure-vec [x] + (if (vector? x) x [x])) + (defn parser "Takes a string specification of a context-free grammar, or a URI for a text file containing such a specification, @@ -144,12 +147,15 @@ [grammar-specification &{:as options}] {:pre [(contains? #{:abnf :ebnf nil} (get options :input-format)) (contains? #{:enlive :hiccup nil} (get options :output-format)) - (let [ws-parser (get options :auto-whitespace)] + (let [[ws-parser selector selected] (ensure-vec (get options :auto-whitespace))] (or (nil? ws-parser) (and - (map? ws-parser) - (contains? ws-parser :grammar) - (contains? ws-parser :start-production))))]} + (map? ws-parser) + (contains? ws-parser :grammar) + (contains? ws-parser :start-production) + (or (nil? selector) + (and (contains? #{:only :except} selector) + (vector? selected))))))]} (let [input-format (get options :input-format *default-input-format*) build-parser (case input-format :abnf abnf/build-parser @@ -187,13 +193,14 @@ (let [spec (slurp grammar-specification) parser (build-parser spec output-format)] (map->Parser parser)))] - - (if-let [{ws-grammar :grammar ws-start :start-production} - (get options :auto-whitespace)] - (assoc built-parser :grammar - (c/auto-whitespace (:grammar built-parser) (:start-production built-parser) - ws-grammar ws-start)) - built-parser))) + + (let [[{ws-grammar :grammar ws-start :start-production} selector selected] + (ensure-vec (get options :auto-whitespace))] + (if ws-grammar + (assoc built-parser :grammar + (c/auto-whitespace (:grammar built-parser) (:start-production built-parser) + ws-grammar ws-start selector selected)) + built-parser)))) (defn failure? "Tests whether a parse result is a failure." diff --git a/test/instaparse/core_test.clj b/test/instaparse/core_test.clj index 3a12012..3a1c5ad 100644 --- a/test/instaparse/core_test.clj +++ b/test/instaparse/core_test.clj @@ -305,6 +305,20 @@ number = #'[0-9]+'" :auto-whitespace whitespace-or-comments)) +(def auto-whitespace-select-only + (insta/parser + "S = not-B B + not-B = (!B #'.')+ + B = 'b'+" + :auto-whitespace [whitespace :only [:B]])) + +(def auto-whitespace-select-except + (insta/parser + "S = not-B B + not-B = (!B #'.')+ + B = 'b'+" + :auto-whitespace [whitespace :except [:not-B]])) + (def eat-a (insta/parser "Aeater = #'[a]'+" :output-format :enlive)) (def int-or-double @@ -597,6 +611,13 @@ (words-and-numbers-auto-whitespace-and-comments " abc 123 (* 456 *) (* (* 7*) 89 *) def ") [:sentence [:word "abc"] [:number "123"] [:word "def"]] + + (auto-whitespace-select-only "aaa 123 b b b") + [:S [:not-B "a" "a" "a" " " "1" "2" "3"] [:B "b" "b" "b"]] + + (auto-whitespace-select-except "aaa 123 b b b") + [:S [:not-B "a" "a" "a" " " "1" "2" "3"] [:B "b" "b" "b"]] + (insta/parses eat-a "aaaaaaaabbbbbb" :total true) '({:tag :Aeater, :content ("a" "a" "a" "a" "a" "a" "a" "a" {:tag :instaparse/failure, :content ("bbbbbb")})})