From 43f0b3e437fc942b09fa94e53bf5fda86c872d49 Mon Sep 17 00:00:00 2001 From: sam atman Date: Thu, 15 Aug 2013 17:29:44 -0700 Subject: [PATCH 1/5] re-parse working --- edn.grammar | 22 +++++++++++++ src/instaparse/re_parse.clj | 65 +++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 edn.grammar create mode 100644 src/instaparse/re_parse.clj diff --git a/edn.grammar b/edn.grammar new file mode 100644 index 0000000..3f921a2 --- /dev/null +++ b/edn.grammar @@ -0,0 +1,22 @@ + +
= object + | vector + | symbol + | keyword + | WS* form WS* ; + +object = WS* "{" WS* pair* "}" WS* ; + +vector = WS* "[" WS* form* "]" WS* ; + + = key val ; + +key = form ; + +val = form ; + +symbol = #"[A-Za-z]+" ; (* a conventional start *) + + = #"[\s,]+" ; (* commas are whitespace *) + +keyword = #"[:][A-Za-z]+" ; \ No newline at end of file diff --git a/src/instaparse/re_parse.clj b/src/instaparse/re_parse.clj new file mode 100644 index 0000000..a78d1b1 --- /dev/null +++ b/src/instaparse/re_parse.clj @@ -0,0 +1,65 @@ +(ns instaparse.re-parse + (:require [instaparse.core :as insta])) + +(defn- e-tree-seq + "tree-seqs enlive trees/graphs, at least instaparse ones" + [e-tree] + (if (map? (first e-tree)) + (tree-seq (comp seq :content) :content (first e-tree)) + (tree-seq (comp seq :content) :content e-tree))) + +(defn- flatten-enlive + "flattens an enlive tree (instaparse dialect)" + [tree] + (apply str (filter string? (e-tree-seq tree)))) + +(defn- flatten-hiccup + "flattens a hiccup tree (instparse dialect)" + [tree] + (apply str (filter string? (flatten tree)))) + +(defn re-parse + "[parser tree] + reparses a instaparse tree with the given parser." + [parser tree] + (if (vector? tree) + (insta/parse parser (flatten-hiccup tree)) + (insta/parse parser (flatten-enlive tree)))) + +;;;;;;;;;;;;;;;;; +; Demonstration ; +;;;;;;;;;;;;;;;;; + + +(def ^:private m-enl + "simple tree parser" + (insta/parser "tree: node* + node: leaf | '(' node (<'('> node <')'>)* node* ')' + leaf: #'a+' + " :output-format :enlive)) +(def ^:private edn-enl + "simple edn parser" + (insta/parser (slurp "edn.grammar") :output-format :enlive)) + +(def ^:private m-hic + (insta/parser "tree: node* + node: leaf | '(' node (<'('> node <')'>)* node* ')' + leaf: #'a+' ; ")) + +(= (flatten-hiccup (m-hic "a(a(a)a)a")) + "a(a(a)a)a") ; true +(= (flatten-enlive (m-enl "a(a)a")) ; <-this is a map + "a(a)a") ; true +(= (flatten-enlive (edn-enl "{:foo bar}")) ; <-this is a list w. a map in it + "{:foo bar}") ; true + + + +(= (re-parse m-hic (m-hic "a(a)a")) + (m-hic "a(a)a")) ;true +(= (re-parse m-enl (m-enl "a(a)a")) + (m-enl "a(a)a")) ; true +(= (re-parse m-enl (m-hic "a(a)a")) + (m-enl "a(a)a")) ; true + + \ No newline at end of file From 2e73e1fb0f478b19848fce9355dd6bebc7a9f122 Mon Sep 17 00:00:00 2001 From: sam atman Date: Thu, 15 Aug 2013 19:13:48 -0700 Subject: [PATCH 2/5] hmm --- src/instaparse/re_parse.clj | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/instaparse/re_parse.clj b/src/instaparse/re_parse.clj index a78d1b1..da444e2 100644 --- a/src/instaparse/re_parse.clj +++ b/src/instaparse/re_parse.clj @@ -18,14 +18,30 @@ [tree] (apply str (filter string? (flatten tree)))) -(defn re-parse +(defn- re-parse-tree "[parser tree] reparses a instaparse tree with the given parser." [parser tree] (if (vector? tree) (insta/parse parser (flatten-hiccup tree)) (insta/parse parser (flatten-enlive tree)))) - + + +(defn re-parse + "[parser tree (:rule)] + Re-parse an instaparse tree with a parser + If :rule is given, re-parse only those nodes matching + :rule." + ([parser tree] + (if (vector? tree) + (insta/parse parser (flatten-hiccup tree)) + (insta/parse parser (flatten-enlive tree)))) + ([parser tree rule] + (if (vector? tree) + (insta/transform {rule (fn [node] [:node (re-parse-tree parser node)])} tree) + (insta/parse parser (flatten-enlive tree))))) + + ;;;;;;;;;;;;;;;;; ; Demonstration ; ;;;;;;;;;;;;;;;;; @@ -37,8 +53,9 @@ node: leaf | '(' node (<'('> node <')'>)* node* ')' leaf: #'a+' " :output-format :enlive)) + (def ^:private edn-enl - "simple edn parser" + "toy edn parser" (insta/parser (slurp "edn.grammar") :output-format :enlive)) (def ^:private m-hic @@ -56,10 +73,12 @@ (= (re-parse m-hic (m-hic "a(a)a")) - (m-hic "a(a)a")) ;true + (m-hic "a(a)a")) ; true (= (re-parse m-enl (m-enl "a(a)a")) - (m-enl "a(a)a")) ; true + (m-enl "a(a)a")) ; true (= (re-parse m-enl (m-hic "a(a)a")) - (m-enl "a(a)a")) ; true + (m-enl "a(a)a")) ; true + (= (re-parse m-hic (m-hic "a(a)a") :tree) + (m-hic "a(a)a")) \ No newline at end of file From 3f8e4d5962c9efcc538072aa134f7235e3edebe7 Mon Sep 17 00:00:00 2001 From: sam atman Date: Thu, 15 Aug 2013 19:25:34 -0700 Subject: [PATCH 3/5] hiccup works --- src/instaparse/re_parse.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/instaparse/re_parse.clj b/src/instaparse/re_parse.clj index da444e2..0aa021c 100644 --- a/src/instaparse/re_parse.clj +++ b/src/instaparse/re_parse.clj @@ -38,7 +38,7 @@ (insta/parse parser (flatten-enlive tree)))) ([parser tree rule] (if (vector? tree) - (insta/transform {rule (fn [node] [:node (re-parse-tree parser node)])} tree) + (insta/transform {rule (fn [& node] (re-parse-tree parser [:rule node]))} tree) (insta/parse parser (flatten-enlive tree))))) From 3db945e8d529066cae28fdeef6aabb9632f311fb Mon Sep 17 00:00:00 2001 From: sam atman Date: Thu, 15 Aug 2013 19:32:03 -0700 Subject: [PATCH 4/5] re-parse works w. both hiccup and enlive --- src/instaparse/re_parse.clj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/instaparse/re_parse.clj b/src/instaparse/re_parse.clj index 0aa021c..764f702 100644 --- a/src/instaparse/re_parse.clj +++ b/src/instaparse/re_parse.clj @@ -38,8 +38,8 @@ (insta/parse parser (flatten-enlive tree)))) ([parser tree rule] (if (vector? tree) - (insta/transform {rule (fn [& node] (re-parse-tree parser [:rule node]))} tree) - (insta/parse parser (flatten-enlive tree))))) + (insta/transform {rule (fn [& node] (re-parse-tree parser [rule node]))} tree) + (insta/transform {rule (fn [& node] (re-parse-tree parser {:tag rule, :content node}))} tree)))) ;;;;;;;;;;;;;;;;; From 97fe28401b03f6d5b2d6244c36b80517ee44fc4f Mon Sep 17 00:00:00 2001 From: sam atman Date: Thu, 15 Aug 2013 20:13:19 -0700 Subject: [PATCH 5/5] one less fn --- src/instaparse/re_parse.clj | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/src/instaparse/re_parse.clj b/src/instaparse/re_parse.clj index 764f702..a5027cd 100644 --- a/src/instaparse/re_parse.clj +++ b/src/instaparse/re_parse.clj @@ -17,15 +17,6 @@ "flattens a hiccup tree (instparse dialect)" [tree] (apply str (filter string? (flatten tree)))) - -(defn- re-parse-tree - "[parser tree] - reparses a instaparse tree with the given parser." - [parser tree] - (if (vector? tree) - (insta/parse parser (flatten-hiccup tree)) - (insta/parse parser (flatten-enlive tree)))) - (defn re-parse "[parser tree (:rule)] @@ -38,8 +29,8 @@ (insta/parse parser (flatten-enlive tree)))) ([parser tree rule] (if (vector? tree) - (insta/transform {rule (fn [& node] (re-parse-tree parser [rule node]))} tree) - (insta/transform {rule (fn [& node] (re-parse-tree parser {:tag rule, :content node}))} tree)))) + (insta/transform {rule (fn [& node] (re-parse parser [rule node]))} tree) + (insta/transform {rule (fn [& node] (re-parse parser {:tag rule, :content node}))} tree)))) ;;;;;;;;;;;;;;;;; @@ -54,7 +45,7 @@ leaf: #'a+' " :output-format :enlive)) -(def ^:private edn-enl +(def ^:private edn-enl ; "toy edn parser" (insta/parser (slurp "edn.grammar") :output-format :enlive)) @@ -73,12 +64,13 @@ (= (re-parse m-hic (m-hic "a(a)a")) - (m-hic "a(a)a")) ; true + (m-hic "a(a)a")) ; true (= (re-parse m-enl (m-enl "a(a)a")) - (m-enl "a(a)a")) ; true + (m-enl "a(a)a")) ; true (= (re-parse m-enl (m-hic "a(a)a")) - (m-enl "a(a)a")) ; true - (= (re-parse m-hic (m-hic "a(a)a") :tree) - (m-hic "a(a)a")) - + (m-enl "a(a)a")) ; true +(= (re-parse m-hic (m-hic "a(a)a") :tree) + (m-hic "a(a)a")) ; true +(= (re-parse m-enl (m-enl "a(a)a") :tree) + (m-enl "a(a)a")) ; true \ No newline at end of file