From 94cc8b1b69825257a3e0b4fc17135f1b1453ff8c Mon Sep 17 00:00:00 2001 From: Joel Martin Date: Sun, 18 Mar 2018 14:57:24 -0500 Subject: [PATCH] Add path log metadata to top of parse result. When a Tramp record is created by make-tramp, the grammar tree is walked and the get-in style path is attached as metadata to each node in the grammar tree. Also a new :path-log atom is added to the Tramp record. In push-result (during parsing) the current path is retrieved from the metadata of node in the grammar tree being processed. The path is then added to the :path-log of the tramp object. When the final result is returned, the :path-log from the tramp object is added to the metadata of the top of the result returned. --- src/instaparse/gll.cljc | 33 ++++++++++-- test/instaparse/core_test.cljc | 4 +- test/instaparse/path_log.cljc | 93 ++++++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 7 deletions(-) create mode 100644 test/instaparse/path_log.cljc diff --git a/src/instaparse/gll.cljc b/src/instaparse/gll.cljc index 0290128..7f168dd 100644 --- a/src/instaparse/gll.cljc +++ b/src/instaparse/gll.cljc @@ -108,7 +108,7 @@ ; In diagnostic messages, how many characters ahead do we want to show. (def ^:dynamic *diagnostic-char-lookahead* 10) -(declare sub-sequence string-context) +(declare sub-sequence string-context merge-meta) #?(:clj (defn string-context [^CharSequence text index] @@ -218,6 +218,22 @@ :cljs (def sub-sequence subs)) +(defn with-path-meta + [g] + (let [gfn (fn gfn [root path] + (vary-meta + (if (:parser root) + (assoc root + :parser (gfn (:parser root) (conj path (:tag root)))) + (if (:parsers root) + (assoc root + :parsers (map-indexed #(gfn %2 (conj path (:tag root) %1)) + (:parsers root))) + root)) + assoc :path path))] + (into {} (for [[nt exp] g] + [nt (gfn exp [nt])])))) + ; The trampoline structure contains the grammar, text to parse, a stack and a nodes ; Also contains an atom to hold successes and one to hold index of failure point. ; grammar is a map from non-terminals to parsers @@ -229,16 +245,18 @@ (defrecord Tramp [grammar text segment fail-index node-builder stack next-stack generation negative-listeners - msg-cache nodes success failure trace?]) + msg-cache nodes success failure trace? + path-log]) (defn make-tramp ([grammar text] (make-tramp grammar text (text->segment text) -1 nil)) ([grammar text segment] (make-tramp grammar text segment -1 nil)) ([grammar text fail-index node-builder] (make-tramp grammar text (text->segment text) fail-index node-builder)) ([grammar text segment fail-index node-builder] - (Tramp. grammar text segment + (Tramp. (with-path-meta grammar) text segment fail-index node-builder (atom []) (atom []) (atom 0) (atom (sorted-map-by >)) - (atom {}) (atom {}) (atom nil) (atom (Failure. 0 [])) (trace-or-false)))) + (atom {}) (atom {}) (atom nil) (atom (Failure. 0 [])) (trace-or-false) + (atom [])))) ; A Success record contains the result and the index to continue from (defn make-success [result index] {:result result :index index}) @@ -360,6 +378,8 @@ total? (total-success? tramp result) results (if total? (:full-results node) (:results node))] (when (not (@results result)) ; when result is not already in @results + (when-let [path (:path (meta parser))] + (swap! (:path-log tramp) conj path)) (profile (add! :push-result)) (swap! results conj result) (doseq [listener @(:listeners node)] @@ -457,7 +477,10 @@ (cond @(:success tramp) (do (log tramp "Successful parse.\nProfile: " @stats) - (cons (:result @(:success tramp)) + (cons (let [obj (:result @(:success tramp))] + (if (coll? obj) + (merge-meta obj {:path-log @(:path-log tramp)}) + obj)) (lazy-seq (do (reset! (:success tramp) nil) (run tramp true))))) diff --git a/test/instaparse/core_test.cljc b/test/instaparse/core_test.cljc index a8ad3c5..ae59728 100644 --- a/test/instaparse/core_test.cljc +++ b/test/instaparse/core_test.cljc @@ -793,12 +793,12 @@ (defn hiccup-line-col-spans [t] (if (sequential? t) - (cons (meta t) (map hiccup-line-col-spans (next t))) + (cons (dissoc (meta t) :path-log) (map hiccup-line-col-spans (next t))) t)) (defn enlive-line-col-spans [t] (if (map? t) - (cons (meta t) (map enlive-line-col-spans (:content t))) + (cons (dissoc (meta t) :path-log) (map enlive-line-col-spans (:content t))) t)) (deftest line-col-test diff --git a/test/instaparse/path_log.cljc b/test/instaparse/path_log.cljc new file mode 100644 index 0000000..6926819 --- /dev/null +++ b/test/instaparse/path_log.cljc @@ -0,0 +1,93 @@ +(ns instaparse.path-log + (:require + #?(:clj [clojure.test :refer [deftest is]] + :cljs [cljs.test :as t]) + #?(:clj [instaparse.core :as insta] + :cljs [instaparse.core :as insta])) + #?(:cljs (:require-macros + [cljs.test :refer [is deftest]]))) + +(def simple-parser + (insta/parser + "TOP = R1+ \"\\n\" + R1 = ( 'foo' | 'bar' | R3 )+ + | 'baz' + | R2 + R2 = 'qux' + | 'quux' + | 'quuux' + R3 = 'z'")) + +(def text1 + "foo\n") + +(def text2 + "barquuxz\n") + +(def text3 + "quxquxquuxquuxquuuxquuux\n") + +(deftest path-log-tests + (let [res1 (simple-parser text1) + path-freqs1 (->> res1 meta :path-log frequencies) + res2 (simple-parser text2) + path-freqs2 (->> res2 meta :path-log frequencies) + res3 (simple-parser text3) + path-freqs3 (->> res3 meta :path-log frequencies)] + + (is (= res1 + [:TOP + [:R1 "foo"] + "\n"])) + (is (= path-freqs1 + {[:TOP] 1, + [:TOP :cat 0] 1, + [:TOP :cat 0 :plus] 1, + [:TOP :cat 1] 1, + [:R1] 1, + [:R1 :alt 0] 1, + [:R1 :alt 0 :plus] 1, + [:R1 :alt 0 :plus :alt 0] 1})) + + (is (= res2 + [:TOP + [:R1 "bar"] + [:R1 [:R2 "quux"]] + [:R1 [:R3 "z"]] + "\n"])) + (is (= path-freqs2 + {[:TOP] 1, + [:TOP :cat 0] 3, + [:TOP :cat 0 :plus] 3, + [:TOP :cat 1] 1, + [:R1] 3, + [:R1 :alt 0] 2, + [:R1 :alt 0 :plus] 2, + [:R1 :alt 0 :plus :alt 1] 1, + [:R1 :alt 0 :plus :alt 2] 1, + [:R1 :alt 2] 1, + [:R2] 1, + [:R2 :alt 1] 1, + [:R3] 1})) + + (is (= res3 + [:TOP + [:R1 [:R2 "qux"]] + [:R1 [:R2 "qux"]] + [:R1 [:R2 "quux"]] + [:R1 [:R2 "quux"]] + [:R1 [:R2 "quuux"]] + [:R1 [:R2 "quuux"]] + "\n"])) + (is (= path-freqs3 + {[:TOP] 1, + [:TOP :cat 0] 6, + [:TOP :cat 0 :plus] 6, + [:TOP :cat 1] 1, + [:R1] 6, + [:R1 :alt 2] 6, + [:R2] 6, + [:R2 :alt 0] 2, + [:R2 :alt 1] 2, + [:R2 :alt 2] 2})))) +