Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 189 additions & 0 deletions lib/thrift/parser/nimble.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
defmodule Thrift.Parser.Nimble do
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we'll have multiple parsecs in here? Otherwise, just Thrift.Parser.Lexer (lib/thrift/parser/lexer.ex) seems better.

import NimbleParsec

defmodule Lexer do
@moduledoc false
@punctuator ~c"(){}[]<>,;=*"

@keywords ~w(
namespace include cpp_include
typedef enum union struct exception
void bool byte i8 i16 i32 i64 double string binary list map set
const oneway extends throws service required optional
true false
)

def token(combinator \\ empty()) do
choice(combinator, [
keyword(),
identifier(),
literal(),
number(),
punctuator(),
whitespace()
])
end

defp whitespace() do
ascii_string([?\s, ?\t, ?\n, ?\v, ?\f, ?\r], min: 1)
|> ignore()
|> label("whitespace")
end

defp keyword() do
@keywords
|> Enum.map(&(string(&1) |> replace(String.to_atom(&1))))
|> choice()
|> label("keyword")
end

defp punctuator() do
@punctuator
|> Enum.map(&(string(<<&1>>) |> replace(String.to_atom(<<&1>>))))
|> choice()
|> label("punctuator")
end

defp literal() do
choice([
literal_with(?"),
literal_with(?')
])
|> reduce({List, :to_string, []})
|> label("literal")
end

defp literal_with(char) do
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps quoted_literal(quote_char)?

delim = ascii_char([char])

delim
|> ignore()
|> concat(
choice([
utf8_char([?\\]) |> ignore() |> concat(delim),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this handle embedded newlines, etc. like we support in the current lexer?

% Process a quoted string by stripping its surrounding quote characters and
% expanding any escape sequences (prefixed by a \). To keep things simple,
% we're very lenient in that we allow any character to be escaped, and if the
% character isn't "special" (like \n), we just return the unescaped character.
% It might be nicer in the future to report "bad" escape characters, but that
% would involve complicating this logic to allow a top-level {error, Reason}
% result that could be returned to leex above.
process_string(S,Len) -> process_chars(lists:sublist(S, 2, Len-2)).
process_chars([$\\,$n|Chars]) -> [$\n|process_chars(Chars)];
process_chars([$\\,$r|Chars]) -> [$\r|process_chars(Chars)];
process_chars([$\\,$t|Chars]) -> [$\t|process_chars(Chars)];
process_chars([$\\,C|Chars]) -> [C|process_chars(Chars)];
process_chars([C|Chars]) -> [C|process_chars(Chars)];
process_chars([]) -> [].

utf8_char([]),
error(eos(), "expected literal delimiter ?#{[char]}"),
error(empty(), "expected utf8 codepoint")
])
|> repeat_until([delim])
)
|> ignore(delim)
end

defp number(combinator \\ empty()) do
combinator
|> choice([
ascii_char([?-, ?+])
|> choice([
unsigned_number(),
empty()
|> error("expected number")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this reads a little better as (like you do above):

error(empty(), "expected number")

])
|> post_traverse({__MODULE__, :__sign__, []}),
unsigned_number()
])
|> label("number")
end

defp unsigned_number() do
choice([
hex(),
integer(min: 1)
|> choice([
ignore(ascii_char([?.]))
|> integer(min: 1)
|> optional(ignore(ascii_char([?E, ?e])) |> exponent()),
empty()
|> replace(0)
|> ignore(ascii_char([?E, ?e]))
|> exponent(),
empty()
])
])
|> optional(
ascii_char([?.])
|> ignore()
|> error(empty(), "expected integer fraction for significand")
)
|> post_traverse({__MODULE__, :__number__, []})
end

def __sign__(_rest, acc, context, _line, _offset) do
case acc do
[number, ?-] ->
{[-number], context}

[number, ?+] ->
{[number], context}
end
end

defp hex() do
string("0x")
|> ignore()
|> choice([
ascii_string([?0..?9, ?a..?f, ?A..?F], min: 1)
|> map({String, :to_integer, [16]}),
empty()
|> error("expected hexidecimal digit")
])
end

defp exponent(combinator) do
combinator
|> choice([
choice([
ascii_char([?-, ?+]),
empty() |> replace(?+)
])
|> integer(min: 1),
empty()
|> error("expected integer exponent")
])
end

def __number__(_rest, acc, context, _line, _offset) do
case acc do
[_int] ->
{acc, context}

[fraction, int] ->
{[String.to_float("#{int}.#{fraction}")], context}

[exponent, exponent_sign, fraction, int] ->
{[String.to_float("#{int}.#{fraction}e#{[exponent_sign]}#{exponent}")], context}
end
end

def identifier() do
ascii_char([?a..?z, ?A..?Z, ?_])
|> repeat(ascii_char([?a..?z, ?A..?Z, ?_, ?0..?9]))
|> reduce({List, :to_atom, []})
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider extracting these three lines into their own function because we repeat them below.

|> optional(
repeat(
ascii_char([?.])
|> ignore()
|> choice([
ascii_char([?a..?z, ?A..?Z, ?_])
|> repeat(ascii_char([?a..?z, ?A..?Z, ?_, ?0..?9]))
|> reduce({List, :to_atom, []}),
empty()
|> error("expected alphabetic character or underscore to continue identifier")
])
)
)
|> wrap()
|> label("identifier")
end

defp error(combinator \\ empty(), to_error, label) do
pre_traverse(combinator, to_error, {__MODULE__, :__error__, [label]})
end

def __error__(_rest, _acc, _context, _line, _offset, label) do
{:error, label}
end
end

defparsec(:parse_token, Lexer.token())
end
4 changes: 4 additions & 0 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ defmodule Thrift.Mixfile do
{:credo, "~> 1.0", only: :dev, runtime: false},
{:dialyxir, "~> 0.5", only: :dev, runtime: false},

# Compile
{:nimble_parsec, "~> 0.4",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0.5 was just released.

github: "plataformatec/nimble_parsec", runtime: false, override: true},

# Runtime
{:connection, "~> 1.0"},
{:ranch, "~> 1.6"}
Expand Down
2 changes: 1 addition & 1 deletion mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"makeup_elixir": {:hex, :makeup_elixir, "0.8.0", "1204a2f5b4f181775a0e456154830524cf2207cf4f9112215c05e0b76e4eca8b", [:mix], [{:makeup, "~> 0.5.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 0.2.2", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"},
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"},
"mimerl": {:hex, :mimerl, "1.0.2", "993f9b0e084083405ed8252b99460c4f0563e41729ab42d9074fd5e52439be88", [:rebar3], [], "hexpm"},
"nimble_parsec": {:hex, :nimble_parsec, "0.2.2", "d526b23bdceb04c7ad15b33c57c4526bf5f50aaa70c7c141b4b4624555c68259", [:mix], [], "hexpm"},
"nimble_parsec": {:git, "https://github.com/plataformatec/nimble_parsec.git", "40c7d9b6049158911e456a51cee6b9b5543f1d0d", []},
"parse_trans": {:hex, :parse_trans, "3.2.0", "2adfa4daf80c14dc36f522cf190eb5c4ee3e28008fc6394397c16f62a26258c2", [:rebar3], [], "hexpm"},
"poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], [], "hexpm"},
"ranch": {:hex, :ranch, "1.6.2", "6db93c78f411ee033dbb18ba8234c5574883acb9a75af0fb90a9b82ea46afa00", [:rebar3], [], "hexpm"},
Expand Down
93 changes: 93 additions & 0 deletions test/thrift/parser/nimble_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
defmodule Thrift.Parser.NimbleTest do
use ExUnit.Case, async: true
import Thrift.Parser.Nimble, only: [parse_token: 1]

describe "parse_token/1" do
test "returns ok on integer" do
assert parse_token("111") == {:ok, [111], "", %{}, {1, 0}, 3}
assert parse_token("-111") == {:ok, [-111], "", %{}, {1, 0}, 4}
assert parse_token("+111") == {:ok, [111], "", %{}, {1, 0}, 4}
end

test "returns error on invalid partial number" do
assert parse_token("-A") == {:error, "expected number", "A", %{}, {1, 0}, 1}
assert parse_token("+") == {:error, "expected number", "", %{}, {1, 0}, 1}
end

test "returns ok on hex" do
assert parse_token("0x1F") == {:ok, [31], "", %{}, {1, 0}, 4}
assert parse_token("-0x1a2") == {:ok, [-418], "", %{}, {1, 0}, 6}
assert parse_token("+0x0FF0") == {:ok, [4080], "", %{}, {1, 0}, 7}
end

test "returns error on invalid partial hex" do
assert parse_token("0xG") == {:error, "expected hexidecimal digit", "G", %{}, {1, 0}, 2}
assert parse_token("0x") == {:error, "expected hexidecimal digit", "", %{}, {1, 0}, 2}
end

test "returns ok on double" do
assert parse_token("0.0") == {:ok, [0.0], "", %{}, {1, 0}, 3}
assert parse_token("-1.0") == {:ok, [-1.0], "", %{}, {1, 0}, 4}
assert parse_token("+1.0") == {:ok, [1.0], "", %{}, {1, 0}, 4}
assert parse_token("1e0") == {:ok, [1.0], "", %{}, {1, 0}, 3}
assert parse_token("-2E1") == {:ok, [-20.0], "", %{}, {1, 0}, 4}
assert parse_token("+3.2e1") == {:ok, [32.0], "", %{}, {1, 0}, 6}
assert parse_token("43.2E-1") == {:ok, [4.32], "", %{}, {1, 0}, 7}
assert parse_token("-5.432E+1") == {:ok, [-54.32], "", %{}, {1, 0}, 9}
end

test "returns error on invalid partial double" do
assert parse_token("0.a") ==
{:error, "expected integer fraction for significand", "a", %{}, {1, 0}, 2}

assert parse_token("1.") ==
{:error, "expected integer fraction for significand", "", %{}, {1, 0}, 2}

assert parse_token("0e!") == {:error, "expected integer exponent", "!", %{}, {1, 0}, 2}
assert parse_token("0E+e") == {:error, "expected integer exponent", "+e", %{}, {1, 0}, 2}
assert parse_token("0E") == {:error, "expected integer exponent", "", %{}, {1, 0}, 2}
end

test "returns ok on literal" do
assert parse_token(~s("hi")) == {:ok, ["hi"], "", %{}, {1, 0}, 4}
assert parse_token(~s('hello')) == {:ok, ["hello"], "", %{}, {1, 0}, 7}
assert parse_token(~s("hi 'world'")) == {:ok, ["hi 'world'"], "", %{}, {1, 0}, 12}
assert parse_token(~s("hi \\"world\\"")) == {:ok, ["hi \"world\""], "", %{}, {1, 0}, 14}
assert parse_token(~s('hello \\'world\\'')) == {:ok, ["hello 'world'"], "", %{}, {1, 0}, 17}
end

test "returns error on invalid partial literal" do
assert parse_token(~s("hi)) ==
{:error, "expected literal delimiter ?\"", "", %{}, {1, 0}, 3}

assert parse_token(~s("hello) <> <<128>>) ==
{:error, "expected utf8 codepoint", <<128>>, %{}, {1, 0}, 6}
end

test "returns ok on identifier" do
assert parse_token("hi") == {:ok, [[:hi]], "", %{}, {1, 0}, 2}
assert parse_token("Hello") == {:ok, [[:Hello]], "", %{}, {1, 0}, 5}
assert parse_token("_hey") == {:ok, [[:_hey]], "", %{}, {1, 0}, 4}
assert parse_token("hello.world") == {:ok, [[:hello, :world]], "", %{}, {1, 0}, 11}
end

test "returns error on invalid patial identifier" do
assert parse_token("hi.0") ==
{:error, "expected alphabetic character or underscore to continue identifier", "0",
%{}, {1, 0}, 3}

assert parse_token("Hello.!") ==
{:error, "expected alphabetic character or underscore to continue identifier", "!",
%{}, {1, 0}, 6}
end

test "returns ok on whitespace" do
assert parse_token(" hi") == {:ok, [], "hi", %{}, {1, 0}, 1}
assert parse_token("\nhey") == {:ok, [], "hey", %{}, {2, 1}, 1}
assert parse_token("\thello") == {:ok, [], "hello", %{}, {1, 0}, 1}
assert parse_token("\vheya") == {:ok, [], "heya", %{}, {1, 0}, 1}
assert parse_token("\rhiya") == {:ok, [], "hiya", %{}, {1, 0}, 1}
assert parse_token("\fyo") == {:ok, [], "yo", %{}, {1, 0}, 1}
end
end
end