diff --git a/lib/thrift/parser/nimble.ex b/lib/thrift/parser/nimble.ex new file mode 100644 index 00000000..4c74a142 --- /dev/null +++ b/lib/thrift/parser/nimble.ex @@ -0,0 +1,189 @@ +defmodule Thrift.Parser.Nimble do + import NimbleParsec + + defmodule Lexer do + @moduledoc false + @punctuator ~c"(){}[]<>,;=*" + + @keywords ~w( + namespace include cpp_include + typedef enum union struct exception + void bool byte i8 i16 i32 i64 double string binary list map set + const oneway extends throws service required optional + true false + ) + + def token(combinator \\ empty()) do + choice(combinator, [ + keyword(), + identifier(), + literal(), + number(), + punctuator(), + whitespace() + ]) + end + + defp whitespace() do + ascii_string([?\s, ?\t, ?\n, ?\v, ?\f, ?\r], min: 1) + |> ignore() + |> label("whitespace") + end + + defp keyword() do + @keywords + |> Enum.map(&(string(&1) |> replace(String.to_atom(&1)))) + |> choice() + |> label("keyword") + end + + defp punctuator() do + @punctuator + |> Enum.map(&(string(<<&1>>) |> replace(String.to_atom(<<&1>>)))) + |> choice() + |> label("punctuator") + end + + defp literal() do + choice([ + literal_with(?"), + literal_with(?') + ]) + |> reduce({List, :to_string, []}) + |> label("literal") + end + + defp literal_with(char) do + delim = ascii_char([char]) + + delim + |> ignore() + |> concat( + choice([ + utf8_char([?\\]) |> ignore() |> concat(delim), + utf8_char([]), + error(eos(), "expected literal delimiter ?#{[char]}"), + error(empty(), "expected utf8 codepoint") + ]) + |> repeat_until([delim]) + ) + |> ignore(delim) + end + + defp number(combinator \\ empty()) do + combinator + |> choice([ + ascii_char([?-, ?+]) + |> choice([ + unsigned_number(), + empty() + |> error("expected number") + ]) + |> post_traverse({__MODULE__, :__sign__, []}), + unsigned_number() + ]) + |> label("number") + end + + defp unsigned_number() do + choice([ + hex(), + integer(min: 1) + |> choice([ + ignore(ascii_char([?.])) + |> integer(min: 1) + |> optional(ignore(ascii_char([?E, ?e])) |> exponent()), + empty() + |> replace(0) + |> ignore(ascii_char([?E, ?e])) + |> exponent(), + empty() + ]) + ]) + |> optional( + ascii_char([?.]) + |> ignore() + |> error(empty(), "expected integer fraction for significand") + ) + |> post_traverse({__MODULE__, :__number__, []}) + end + + def __sign__(_rest, acc, context, _line, _offset) do + case acc do + [number, ?-] -> + {[-number], context} + + [number, ?+] -> + {[number], context} + end + end + + defp hex() do + string("0x") + |> ignore() + |> choice([ + ascii_string([?0..?9, ?a..?f, ?A..?F], min: 1) + |> map({String, :to_integer, [16]}), + empty() + |> error("expected hexidecimal digit") + ]) + end + + defp exponent(combinator) do + combinator + |> choice([ + choice([ + ascii_char([?-, ?+]), + empty() |> replace(?+) + ]) + |> integer(min: 1), + empty() + |> error("expected integer exponent") + ]) + end + + def __number__(_rest, acc, context, _line, _offset) do + case acc do + [_int] -> + {acc, context} + + [fraction, int] -> + {[String.to_float("#{int}.#{fraction}")], context} + + [exponent, exponent_sign, fraction, int] -> + {[String.to_float("#{int}.#{fraction}e#{[exponent_sign]}#{exponent}")], context} + end + end + + def identifier() do + ascii_char([?a..?z, ?A..?Z, ?_]) + |> repeat(ascii_char([?a..?z, ?A..?Z, ?_, ?0..?9])) + |> reduce({List, :to_atom, []}) + |> optional( + repeat( + ascii_char([?.]) + |> ignore() + |> choice([ + ascii_char([?a..?z, ?A..?Z, ?_]) + |> repeat(ascii_char([?a..?z, ?A..?Z, ?_, ?0..?9])) + |> reduce({List, :to_atom, []}), + empty() + |> error("expected alphabetic character or underscore to continue identifier") + ]) + ) + ) + |> wrap() + |> label("identifier") + end + + defp error(combinator \\ empty(), to_error, label) do + pre_traverse(combinator, to_error, {__MODULE__, :__error__, [label]}) + end + + def __error__(_rest, _acc, _context, _line, _offset, label) do + {:error, label} + end + end + + defparsec(:parse_token, Lexer.token()) +end diff --git a/mix.exs b/mix.exs index 6d28b517..6f7c19b8 100644 --- a/mix.exs +++ b/mix.exs @@ -83,6 +83,10 @@ defmodule Thrift.Mixfile do {:credo, "~> 1.0", only: :dev, runtime: false}, {:dialyxir, "~> 0.5", only: :dev, runtime: false}, + # Compile + {:nimble_parsec, "~> 0.4", + github: "plataformatec/nimble_parsec", runtime: false, override: true}, + # Runtime {:connection, "~> 1.0"}, {:ranch, "~> 1.6"} diff --git a/mix.lock b/mix.lock index 20f1c00d..3726bca1 100644 --- a/mix.lock +++ b/mix.lock @@ -16,7 +16,7 @@ "makeup_elixir": {:hex, :makeup_elixir, "0.8.0", "1204a2f5b4f181775a0e456154830524cf2207cf4f9112215c05e0b76e4eca8b", [:mix], [{:makeup, "~> 0.5.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 0.2.2", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"}, "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"}, "mimerl": {:hex, :mimerl, "1.0.2", "993f9b0e084083405ed8252b99460c4f0563e41729ab42d9074fd5e52439be88", [:rebar3], [], "hexpm"}, - "nimble_parsec": {:hex, :nimble_parsec, "0.2.2", "d526b23bdceb04c7ad15b33c57c4526bf5f50aaa70c7c141b4b4624555c68259", [:mix], [], "hexpm"}, + "nimble_parsec": {:git, "https://github.com/plataformatec/nimble_parsec.git", "40c7d9b6049158911e456a51cee6b9b5543f1d0d", []}, "parse_trans": {:hex, :parse_trans, "3.2.0", "2adfa4daf80c14dc36f522cf190eb5c4ee3e28008fc6394397c16f62a26258c2", [:rebar3], [], "hexpm"}, "poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], [], "hexpm"}, "ranch": {:hex, :ranch, "1.6.2", "6db93c78f411ee033dbb18ba8234c5574883acb9a75af0fb90a9b82ea46afa00", [:rebar3], [], "hexpm"}, diff --git a/test/thrift/parser/nimble_test.exs b/test/thrift/parser/nimble_test.exs new file mode 100644 index 00000000..0da378fb --- /dev/null +++ b/test/thrift/parser/nimble_test.exs @@ -0,0 +1,93 @@ +defmodule Thrift.Parser.NimbleTest do + use ExUnit.Case, async: true + import Thrift.Parser.Nimble, only: [parse_token: 1] + + describe "parse_token/1" do + test "returns ok on integer" do + assert parse_token("111") == {:ok, [111], "", %{}, {1, 0}, 3} + assert parse_token("-111") == {:ok, [-111], "", %{}, {1, 0}, 4} + assert parse_token("+111") == {:ok, [111], "", %{}, {1, 0}, 4} + end + + test "returns error on invalid partial number" do + assert parse_token("-A") == {:error, "expected number", "A", %{}, {1, 0}, 1} + assert parse_token("+") == {:error, "expected number", "", %{}, {1, 0}, 1} + end + + test "returns ok on hex" do + assert parse_token("0x1F") == {:ok, [31], "", %{}, {1, 0}, 4} + assert parse_token("-0x1a2") == {:ok, [-418], "", %{}, {1, 0}, 6} + assert parse_token("+0x0FF0") == {:ok, [4080], "", %{}, {1, 0}, 7} + end + + test "returns error on invalid partial hex" do + assert parse_token("0xG") == {:error, "expected hexidecimal digit", "G", %{}, {1, 0}, 2} + assert parse_token("0x") == {:error, "expected hexidecimal digit", "", %{}, {1, 0}, 2} + end + + test "returns ok on double" do + assert parse_token("0.0") == {:ok, [0.0], "", %{}, {1, 0}, 3} + assert parse_token("-1.0") == {:ok, [-1.0], "", %{}, {1, 0}, 4} + assert parse_token("+1.0") == {:ok, [1.0], "", %{}, {1, 0}, 4} + assert parse_token("1e0") == {:ok, [1.0], "", %{}, {1, 0}, 3} + assert parse_token("-2E1") == {:ok, [-20.0], "", %{}, {1, 0}, 4} + assert parse_token("+3.2e1") == {:ok, [32.0], "", %{}, {1, 0}, 6} + assert parse_token("43.2E-1") == {:ok, [4.32], "", %{}, {1, 0}, 7} + assert parse_token("-5.432E+1") == {:ok, [-54.32], "", %{}, {1, 0}, 9} + end + + test "returns error on invalid partial double" do + assert parse_token("0.a") == + {:error, "expected integer fraction for significand", "a", %{}, {1, 0}, 2} + + assert parse_token("1.") == + {:error, "expected integer fraction for significand", "", %{}, {1, 0}, 2} + + assert parse_token("0e!") == {:error, "expected integer exponent", "!", %{}, {1, 0}, 2} + assert parse_token("0E+e") == {:error, "expected integer exponent", "+e", %{}, {1, 0}, 2} + assert parse_token("0E") == {:error, "expected integer exponent", "", %{}, {1, 0}, 2} + end + + test "returns ok on literal" do + assert parse_token(~s("hi")) == {:ok, ["hi"], "", %{}, {1, 0}, 4} + assert parse_token(~s('hello')) == {:ok, ["hello"], "", %{}, {1, 0}, 7} + assert parse_token(~s("hi 'world'")) == {:ok, ["hi 'world'"], "", %{}, {1, 0}, 12} + assert parse_token(~s("hi \\"world\\"")) == {:ok, ["hi \"world\""], "", %{}, {1, 0}, 14} + assert parse_token(~s('hello \\'world\\'')) == {:ok, ["hello 'world'"], "", %{}, {1, 0}, 17} + end + + test "returns error on invalid partial literal" do + assert parse_token(~s("hi)) == + {:error, "expected literal delimiter ?\"", "", %{}, {1, 0}, 3} + + assert parse_token(~s("hello) <> <<128>>) == + {:error, "expected utf8 codepoint", <<128>>, %{}, {1, 0}, 6} + end + + test "returns ok on identifier" do + assert parse_token("hi") == {:ok, [[:hi]], "", %{}, {1, 0}, 2} + assert parse_token("Hello") == {:ok, [[:Hello]], "", %{}, {1, 0}, 5} + assert parse_token("_hey") == {:ok, [[:_hey]], "", %{}, {1, 0}, 4} + assert parse_token("hello.world") == {:ok, [[:hello, :world]], "", %{}, {1, 0}, 11} + end + + test "returns error on invalid patial identifier" do + assert parse_token("hi.0") == + {:error, "expected alphabetic character or underscore to continue identifier", "0", + %{}, {1, 0}, 3} + + assert parse_token("Hello.!") == + {:error, "expected alphabetic character or underscore to continue identifier", "!", + %{}, {1, 0}, 6} + end + + test "returns ok on whitespace" do + assert parse_token(" hi") == {:ok, [], "hi", %{}, {1, 0}, 1} + assert parse_token("\nhey") == {:ok, [], "hey", %{}, {2, 1}, 1} + assert parse_token("\thello") == {:ok, [], "hello", %{}, {1, 0}, 1} + assert parse_token("\vheya") == {:ok, [], "heya", %{}, {1, 0}, 1} + assert parse_token("\rhiya") == {:ok, [], "hiya", %{}, {1, 0}, 1} + assert parse_token("\fyo") == {:ok, [], "yo", %{}, {1, 0}, 1} + end + end +end