From b55a3519ceacacd22de83f48c85299ef66765f63 Mon Sep 17 00:00:00 2001 From: Shubham Ugare Date: Wed, 16 Jul 2025 10:08:37 -0500 Subject: [PATCH] Fix minor issues in Java Grammar --- README.md | 4 ++-- syncode/parsers/grammars/java.lark | 25 ++++++++++--------------- tests/parser/test_grammar_java.py | 6 +++++- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index d5597de1..f6b8d055 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ Define your own grammar using simple EBNF syntax. Check out our [notebooks direc | 🔥 Fast grammar-guided generation (as little as 10% generation overhead with Python and Go!) | | 🤖 Seamlessly work with any HuggingFace Language Model, including Code, Chat, and Instruct models | | 🖍️ Pass in any CFG in the EBNF format (even large grammars for programming languages like Python and Go!) | -| 📝 Built-in CFGs for **Python, Go, SQL, Math, JSON**, and more! | +| 📝 Built-in CFGs for **Python, Go, Java, SQL, Math, JSON**, and more! | | 🎲 Sample with any existing decoding strategy (eg. greedy, beam search, nucleus sampling) | @@ -148,7 +148,7 @@ print(f"SynCode output:\n{output}") # } ``` -Check more examples of using Python, Go, and other grammars in Notebooks and a quick example at +Check more examples of using Python, Go, Java and other grammars in Notebooks and a quick example at   [](https://colab.research.google.com/drive/1rYm8iehx_qYdtgWmqLkmhIjizhUVTb9E?usp=sharing) #### Instuct-tuned Models diff --git a/syncode/parsers/grammars/java.lark b/syncode/parsers/grammars/java.lark index 6ae4f921..eb948a2f 100644 --- a/syncode/parsers/grammars/java.lark +++ b/syncode/parsers/grammars/java.lark @@ -1,21 +1,9 @@ start: compilation_unit -%import common.CNAME -%import common.DIGIT -%import common.WS - -%ignore WS - -LINE_COMMENT: /\/\/[^\n\r]*/ -BLOCK_COMMENT: /\/\*[\s\S]*?\*\// - type_parameters: "<" type_parameter ("," type_parameter)* ">" type_parameter: CNAME type_bound? type_bound: "extends" type ("&" type)* -%ignore LINE_COMMENT -%ignore BLOCK_COMMENT - compilation_unit: package_declaration? import_declarations? type_declarations? package_declaration: "package" name ";" @@ -188,7 +176,7 @@ floating_point_literal: DIGIT+ "." DIGIT+ boolean_literal: "true" | "false" -character_literal: "'" /[^\\'\n\r]/ "'" +character_literal: /'([^'\r\n\\]|\\([btnfr"'\\0-7]|[0-3]?[0-7]{2})|\\u[0-9a-fA-f]{4})'/ string_literal: /".*?"/ @@ -206,8 +194,6 @@ dim_expr: "[" expression "]" dims: "[" "]"+ - - field_access: primary "." CNAME | "super" "." CNAME method_invocation: name "(" argument_list? ")" | primary "." CNAME "(" argument_list? ")" | "super" "." CNAME "(" argument_list? ")" @@ -289,3 +275,12 @@ reference_type: class_or_interface_type | array_type array_type: primitive_type dims | name dims | array_type dims +LINE_COMMENT: /\/\/[^\n\r]*/ +BLOCK_COMMENT: /\/\*[\s\S]*?\*\// + +%import common.CNAME +%import common.DIGIT +%import common.WS +%ignore WS +%ignore LINE_COMMENT +%ignore BLOCK_COMMENT diff --git a/tests/parser/test_grammar_java.py b/tests/parser/test_grammar_java.py index 26c82912..e1c237cd 100644 --- a/tests/parser/test_grammar_java.py +++ b/tests/parser/test_grammar_java.py @@ -388,4 +388,8 @@ def test_java_parser26(self): public """ r = inc_parser.get_acceptable_next_terminals(partial_code) assert AcceptSequence(['CLASS']) in r.accept_sequences - assert r.remainder_state == RemainderState.COMPLETE \ No newline at end of file + assert r.remainder_state == RemainderState.COMPLETE + +if __name__ == '__main__': + unittest.main() + \ No newline at end of file