pinterest · fishcakez · Dec 13, 2018 · jparise · Dec 13, 2018 · jparise
@@ -0,0 +1,189 @@
+defmodule Thrift.Parser.Nimble do
+  import NimbleParsec
+
+  defmodule Lexer do
+    @moduledoc false
+    @punctuator ~c"(){}[]<>,;=*"
+
+    @keywords ~w(
+      namespace include cpp_include
+      typedef enum union struct exception
+      void bool byte i8 i16 i32 i64 double string binary list map set
+      const oneway extends throws service required optional
+      true false
+    )
+
+    def token(combinator \\ empty()) do
+      choice(combinator, [
+        keyword(),
+        identifier(),
+        literal(),
+        number(),
+        punctuator(),
+        whitespace()
+      ])
+    end
+
+    defp whitespace() do
+      ascii_string([?\s, ?\t, ?\n, ?\v, ?\f, ?\r], min: 1)
+      |> ignore()
+      |> label("whitespace")
+    end
+
+    defp keyword() do
+      @keywords
+      |> Enum.map(&(string(&1) |> replace(String.to_atom(&1))))
+      |> choice()
+      |> label("keyword")
+    end
+
+    defp punctuator() do
+      @punctuator
+      |> Enum.map(&(string(<<&1>>) |> replace(String.to_atom(<<&1>>))))
+      |> choice()
+      |> label("punctuator")
+    end
+
+    defp literal() do
+      choice([
+        literal_with(?"),
+        literal_with(?')
+      ])
+      |> reduce({List, :to_string, []})
+      |> label("literal")
+    end
+
+    defp literal_with(char) do
+      delim = ascii_char([char])
+
+      delim
+      |> ignore()
+      |> concat(
+        choice([
+          utf8_char([?\\]) |> ignore() |> concat(delim),
 % Process a quoted string by stripping its surrounding quote characters and 
 % expanding any escape sequences (prefixed by a \). To keep things simple, 
 % we're very lenient in that we allow any character to be escaped, and if the 
 % character isn't "special" (like \n), we just return the unescaped character. 
 % It might be nicer in the future to report "bad" escape characters, but that 
 % would involve complicating this logic to allow a top-level {error, Reason} 
 % result that could be returned to leex above. 
 process_string(S,Len)           -> process_chars(lists:sublist(S, 2, Len-2)). 
 process_chars([$\\,$n|Chars])   -> [$\n|process_chars(Chars)]; 
 process_chars([$\\,$r|Chars])   -> [$\r|process_chars(Chars)]; 
 process_chars([$\\,$t|Chars])   -> [$\t|process_chars(Chars)]; 
 process_chars([$\\,C|Chars])    -> [C|process_chars(Chars)]; 
 process_chars([C|Chars])        -> [C|process_chars(Chars)]; 
 process_chars([])               -> []. 
 % Process a quoted string by stripping its surrounding quote characters and 
 % expanding any escape sequences (prefixed by a \). To keep things simple, 
 % we're very lenient in that we allow any character to be escaped, and if the 
 % character isn't "special" (like \n), we just return the unescaped character. 
 % It might be nicer in the future to report "bad" escape characters, but that 
 % would involve complicating this logic to allow a top-level {error, Reason} 
 % result that could be returned to leex above. 
  
 process_string(S,Len)           -> process_chars(lists:sublist(S, 2, Len-2)). 
 process_chars([$\\,$n|Chars])   -> [$\n|process_chars(Chars)]; 
 process_chars([$\\,$r|Chars])   -> [$\r|process_chars(Chars)]; 
 process_chars([$\\,$t|Chars])   -> [$\t|process_chars(Chars)]; 
 process_chars([$\\,C|Chars])    -> [C|process_chars(Chars)]; 
 process_chars([C|Chars])        -> [C|process_chars(Chars)]; 
 process_chars([])               -> []. 
+          utf8_char([]),
+          error(eos(), "expected literal delimiter ?#{[char]}"),
+          error(empty(), "expected utf8 codepoint")
+        ])
+        |> repeat_until([delim])
+      )
+      |> ignore(delim)
+    end
+
+    defp number(combinator \\ empty()) do
+      combinator
+      |> choice([
+        ascii_char([?-, ?+])
+        |> choice([
+          unsigned_number(),
+          empty()
+          |> error("expected number")
+        ])
+        |> post_traverse({__MODULE__, :__sign__, []}),
+        unsigned_number()
+      ])
+      |> label("number")
+    end
+
+    defp unsigned_number() do
+      choice([
+        hex(),
+        integer(min: 1)
+        |> choice([
+          ignore(ascii_char([?.]))
+          |> integer(min: 1)
+          |> optional(ignore(ascii_char([?E, ?e])) |> exponent()),
+          empty()
+          |> replace(0)
+          |> ignore(ascii_char([?E, ?e]))
+          |> exponent(),
+          empty()
+        ])
+      ])
+      |> optional(
+        ascii_char([?.])
+        |> ignore()
+        |> error(empty(), "expected integer fraction for significand")
+      )
+      |> post_traverse({__MODULE__, :__number__, []})
+    end
+
+    def __sign__(_rest, acc, context, _line, _offset) do
+      case acc do
+        [number, ?-] ->
+          {[-number], context}
+
+        [number, ?+] ->
+          {[number], context}
+      end
+    end
+
+    defp hex() do
+      string("0x")
+      |> ignore()
+      |> choice([
+        ascii_string([?0..?9, ?a..?f, ?A..?F], min: 1)
+        |> map({String, :to_integer, [16]}),
+        empty()
+        |> error("expected hexidecimal digit")
+      ])
+    end
+
+    defp exponent(combinator) do
+      combinator
+      |> choice([
+        choice([
+          ascii_char([?-, ?+]),
+          empty() |> replace(?+)
+        ])
+        |> integer(min: 1),
+        empty()
+        |> error("expected integer exponent")
+      ])
+    end
+
+    def __number__(_rest, acc, context, _line, _offset) do
+      case acc do
+        [_int] ->
+          {acc, context}
+
+        [fraction, int] ->
+          {[String.to_float("#{int}.#{fraction}")], context}
+
+        [exponent, exponent_sign, fraction, int] ->
+          {[String.to_float("#{int}.#{fraction}e#{[exponent_sign]}#{exponent}")], context}
+      end
+    end
+
+    def identifier() do
+      ascii_char([?a..?z, ?A..?Z, ?_])
+      |> repeat(ascii_char([?a..?z, ?A..?Z, ?_, ?0..?9]))
+      |> reduce({List, :to_atom, []})
+      |> optional(
+        repeat(
+          ascii_char([?.])
+          |> ignore()
+          |> choice([
+            ascii_char([?a..?z, ?A..?Z, ?_])
+            |> repeat(ascii_char([?a..?z, ?A..?Z, ?_, ?0..?9]))
+            |> reduce({List, :to_atom, []}),
+            empty()
+            |> error("expected alphabetic character or underscore to continue identifier")
+          ])
+        )
+      )
+      |> wrap()
+      |> label("identifier")
+    end
+
+    defp error(combinator \\ empty(), to_error, label) do
+      pre_traverse(combinator, to_error, {__MODULE__, :__error__, [label]})
+    end
+
+    def __error__(_rest, _acc, _context, _line, _offset, label) do
+      {:error, label}
+    end
+  end
+
+  defparsec(:parse_token, Lexer.token())
+end
@@ -83,6 +83,10 @@ defmodule Thrift.Mixfile do
       {:credo, "~> 1.0", only: :dev, runtime: false},
       {:dialyxir, "~> 0.5", only: :dev, runtime: false},
 
+      # Compile
+      {:nimble_parsec, "~> 0.4",
+       github: "plataformatec/nimble_parsec", runtime: false, override: true},
+
       # Runtime
       {:connection, "~> 1.0"},
       {:ranch, "~> 1.6"}

@@ -16,7 +16,7 @@
   "makeup_elixir": {:hex, :makeup_elixir, "0.8.0", "1204a2f5b4f181775a0e456154830524cf2207cf4f9112215c05e0b76e4eca8b", [:mix], [{:makeup, "~> 0.5.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 0.2.2", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"},
   "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"},
   "mimerl": {:hex, :mimerl, "1.0.2", "993f9b0e084083405ed8252b99460c4f0563e41729ab42d9074fd5e52439be88", [:rebar3], [], "hexpm"},
-  "nimble_parsec": {:hex, :nimble_parsec, "0.2.2", "d526b23bdceb04c7ad15b33c57c4526bf5f50aaa70c7c141b4b4624555c68259", [:mix], [], "hexpm"},
+  "nimble_parsec": {:git, "https://github.com/plataformatec/nimble_parsec.git", "40c7d9b6049158911e456a51cee6b9b5543f1d0d", []},
   "parse_trans": {:hex, :parse_trans, "3.2.0", "2adfa4daf80c14dc36f522cf190eb5c4ee3e28008fc6394397c16f62a26258c2", [:rebar3], [], "hexpm"},
   "poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], [], "hexpm"},
   "ranch": {:hex, :ranch, "1.6.2", "6db93c78f411ee033dbb18ba8234c5574883acb9a75af0fb90a9b82ea46afa00", [:rebar3], [], "hexpm"},

@@ -0,0 +1,93 @@
+defmodule Thrift.Parser.NimbleTest do
+  use ExUnit.Case, async: true
+  import Thrift.Parser.Nimble, only: [parse_token: 1]
+
+  describe "parse_token/1" do
+    test "returns ok on integer" do
+      assert parse_token("111") == {:ok, [111], "", %{}, {1, 0}, 3}
+      assert parse_token("-111") == {:ok, [-111], "", %{}, {1, 0}, 4}
+      assert parse_token("+111") == {:ok, [111], "", %{}, {1, 0}, 4}
+    end
+
+    test "returns error on invalid partial number" do
+      assert parse_token("-A") == {:error, "expected number", "A", %{}, {1, 0}, 1}
+      assert parse_token("+") == {:error, "expected number", "", %{}, {1, 0}, 1}
+    end
+
+    test "returns ok on hex" do
+      assert parse_token("0x1F") == {:ok, [31], "", %{}, {1, 0}, 4}
+      assert parse_token("-0x1a2") == {:ok, [-418], "", %{}, {1, 0}, 6}
+      assert parse_token("+0x0FF0") == {:ok, [4080], "", %{}, {1, 0}, 7}
+    end
+
+    test "returns error on invalid partial hex" do
+      assert parse_token("0xG") == {:error, "expected hexidecimal digit", "G", %{}, {1, 0}, 2}
+      assert parse_token("0x") == {:error, "expected hexidecimal digit", "", %{}, {1, 0}, 2}
+    end
+
+    test "returns ok on double" do
+      assert parse_token("0.0") == {:ok, [0.0], "", %{}, {1, 0}, 3}
+      assert parse_token("-1.0") == {:ok, [-1.0], "", %{}, {1, 0}, 4}
+      assert parse_token("+1.0") == {:ok, [1.0], "", %{}, {1, 0}, 4}
+      assert parse_token("1e0") == {:ok, [1.0], "", %{}, {1, 0}, 3}
+      assert parse_token("-2E1") == {:ok, [-20.0], "", %{}, {1, 0}, 4}
+      assert parse_token("+3.2e1") == {:ok, [32.0], "", %{}, {1, 0}, 6}
+      assert parse_token("43.2E-1") == {:ok, [4.32], "", %{}, {1, 0}, 7}
+      assert parse_token("-5.432E+1") == {:ok, [-54.32], "", %{}, {1, 0}, 9}
+    end
+
+    test "returns error on invalid partial double" do
+      assert parse_token("0.a") ==
+               {:error, "expected integer fraction for significand", "a", %{}, {1, 0}, 2}
+
+      assert parse_token("1.") ==
+               {:error, "expected integer fraction for significand", "", %{}, {1, 0}, 2}
+
+      assert parse_token("0e!") == {:error, "expected integer exponent", "!", %{}, {1, 0}, 2}
+      assert parse_token("0E+e") == {:error, "expected integer exponent", "+e", %{}, {1, 0}, 2}
+      assert parse_token("0E") == {:error, "expected integer exponent", "", %{}, {1, 0}, 2}
+    end
+
+    test "returns ok on literal" do
+      assert parse_token(~s("hi")) == {:ok, ["hi"], "", %{}, {1, 0}, 4}
+      assert parse_token(~s('hello')) == {:ok, ["hello"], "", %{}, {1, 0}, 7}
+      assert parse_token(~s("hi 'world'")) == {:ok, ["hi 'world'"], "", %{}, {1, 0}, 12}
+      assert parse_token(~s("hi \\"world\\"")) == {:ok, ["hi \"world\""], "", %{}, {1, 0}, 14}
+      assert parse_token(~s('hello \\'world\\'')) == {:ok, ["hello 'world'"], "", %{}, {1, 0}, 17}
+    end
+
+    test "returns error on invalid partial literal" do
+      assert parse_token(~s("hi)) ==
+               {:error, "expected literal delimiter ?\"", "", %{}, {1, 0}, 3}
+
+      assert parse_token(~s("hello) <> <<128>>) ==
+               {:error, "expected utf8 codepoint", <<128>>, %{}, {1, 0}, 6}
+    end
+
+    test "returns ok on identifier" do
+      assert parse_token("hi") == {:ok, [[:hi]], "", %{}, {1, 0}, 2}
+      assert parse_token("Hello") == {:ok, [[:Hello]], "", %{}, {1, 0}, 5}
+      assert parse_token("_hey") == {:ok, [[:_hey]], "", %{}, {1, 0}, 4}
+      assert parse_token("hello.world") == {:ok, [[:hello, :world]], "", %{}, {1, 0}, 11}
+    end
+
+    test "returns error on invalid patial identifier" do
+      assert parse_token("hi.0") ==
+               {:error, "expected alphabetic character or underscore to continue identifier", "0",
+                %{}, {1, 0}, 3}
+
+      assert parse_token("Hello.!") ==
+               {:error, "expected alphabetic character or underscore to continue identifier", "!",
+                %{}, {1, 0}, 6}
+    end
+
+    test "returns ok on whitespace" do
+      assert parse_token(" hi") == {:ok, [], "hi", %{}, {1, 0}, 1}
+      assert parse_token("\nhey") == {:ok, [], "hey", %{}, {2, 1}, 1}
+      assert parse_token("\thello") == {:ok, [], "hello", %{}, {1, 0}, 1}
+      assert parse_token("\vheya") == {:ok, [], "heya", %{}, {1, 0}, 1}
+      assert parse_token("\rhiya") == {:ok, [], "hiya", %{}, {1, 0}, 1}
+      assert parse_token("\fyo") == {:ok, [], "yo", %{}, {1, 0}, 1}
+    end
+  end
+end