Skip to content

Commit c08eccf

Browse files
authored
Merge pull request #230 from Gompyn/patch-2
Refactor Go grammar to handle whitespace and newlines
2 parents 8390816 + ef17c4f commit c08eccf

File tree

1 file changed

+84
-83
lines changed

1 file changed

+84
-83
lines changed

syncode/parsers/grammars/go.lark

Lines changed: 84 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -23,188 +23,188 @@
2323
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN "if" ADVISED OF THE
2424
// POSSIBILITY OF SUCH DAMAGE.
2525

26-
start: package_clause eos (import_decl eos)* ((function_decl | method_decl | declaration) eos "eoc"?)*
26+
start: nls? package_clause eos (import_decl eos)* ((function_decl | method_decl | declaration) eos)*
2727

28-
package_clause: "package" NAME
28+
package_clause: "package" nls? NAME
2929

30-
import_decl: "import" (import_spec | "(" (import_spec eos)* ")")
30+
import_decl: "import" nls? (import_spec | "(" nls? ((import_spec eos)* import_spec eos?)? ")")
3131

32-
import_spec: ("." | NAME)? import_path
32+
import_spec: ("." nls? | NAME)? import_path
3333

3434
import_path: string_
3535

3636
declaration: const_decl | type_decl | var_decl
3737

38-
const_decl: "const" (const_spec | "(" (const_spec eos)* ")")
38+
const_decl: "const" nls? (const_spec | "(" nls? ((const_spec eos)* const_spec eos?)? ")")
3939

40-
const_spec: identifier_list (type_? "=" expression_list)?
40+
const_spec: (identifier_list | NAME) (type_? "=" nls? expression_list)?
4141

42-
identifier_list: NAME ("," NAME)*
42+
identifier_list: NAME ("," nls? NAME)+
4343

44-
expression_list: expression ("," expression)*
44+
expression_list: expression ("," nls? expression)*
4545

46-
type_decl: "type" (type_spec | "(" (type_spec eos)* ")")
46+
type_decl: "type" nls? (type_spec | "(" nls? ((type_spec eos)* type_spec eos?)? ")")
4747

4848
type_spec: alias_decl | type_def
4949

50-
alias_decl : NAME "=" type_
50+
alias_decl : NAME "=" nls? type_
5151

5252
type_def : NAME type_parameters? type_
5353

54-
type_parameters : "[" type_parameter_decl ("," type_parameter_decl)* "]"
54+
type_parameters : "[" nls? type_parameter_decl ("," nls? type_parameter_decl)* "]"
5555

56-
type_parameter_decl : identifier_list type_element
56+
type_parameter_decl : (identifier_list | NAME) type_element
5757

58-
type_element : type_term ("|" type_term)*
58+
type_element : type_term ("|" nls? type_term)*
5959

60-
type_term : "~"? type_
60+
type_term : ("~" nls?)? type_
6161

6262
// Function declarations
6363

64-
function_decl: "func" NAME type_parameters? signature ("{" statement_list? "}" ["eof"])?
64+
function_decl: "func" nls? NAME type_parameters? signature block?
6565
// eof: "}" // This indicates end of function body
6666

67-
method_decl: "func" receiver NAME signature block?
67+
method_decl: "func" nls? receiver NAME signature block?
6868

6969
receiver: parameters
7070

71-
var_decl: "var" (var_spec | "(" (var_spec eos)* ")")
71+
var_decl: "var" nls? (var_spec | "(" nls? ((var_spec eos)* var_spec eos?)? ")")
7272

73-
var_spec: identifier_list (type_ ("=" expression_list)? | "=" expression_list)
73+
var_spec: (identifier_list | NAME) (type_ ("=" nls? expression_list)? | "=" nls? expression_list)
7474

75-
block: "{" statement_list? "}"
75+
block: "{" nls? ((statement? eos)* statement? eos?)? "}"
7676

77-
statement_list: ((";"? | EOS?) statement eos)+
77+
// statement_list: (statement? eos)* statement
7878

7979
statement: declaration | labeled_stmt | simple_stmt | go_stmt | return_stmt | break_stmt | continue_stmt | goto_stmt | fallthrough_stmt | block | if_stmt | switch_stmt | select_stmt | for_stmt | defer_stmt
8080

8181
simple_stmt: send_stmt | inc_dec_stmt | assignment | expression | short_var_decl
8282

83-
send_stmt: expression "<-" expression
83+
send_stmt: expression "<-" nls? expression
8484

8585
inc_dec_stmt: expression ("++" | "--")
8686

87-
assignment: expression assign_op expression | expression_list "=" expression_list
87+
assignment: expression assign_op nls? expression | expression_list "=" nls? expression_list
8888

8989
assign_op: "+=" | "-=" | "|=" | "^=" | "*=" | "/=" | "%=" | "<<=" | ">>=" | "&=" | "&^="
9090

91-
short_var_decl: expression_list ":=" expression_list
91+
short_var_decl: expression_list ":=" nls? expression_list
9292

93-
labeled_stmt: NAME ":" statement?
93+
labeled_stmt: NAME ":"
9494

9595
return_stmt: "return" expression_list?
9696

9797
break_stmt: "break" NAME?
9898

9999
continue_stmt: "continue" NAME?
100100

101-
goto_stmt: "goto" NAME
101+
goto_stmt: "goto" nls? NAME
102102

103103
fallthrough_stmt: "fallthrough"
104104

105-
defer_stmt: "defer" expression
105+
defer_stmt: "defer" nls? expression
106106

107-
if_stmt: "if" ( expression | eos expression | simple_stmt eos expression) block ("else" (if_stmt | block))?
107+
if_stmt: "if" nls? (simple_stmt? eos)? expression block ("else" nls? (if_stmt | block))?
108108

109109
switch_stmt: expr_switch_stmt | type_switch_stmt
110110

111-
expr_switch_stmt: "switch" (expression? | simple_stmt? eos expression?) "{" expr_case_clause* "}"
111+
expr_switch_stmt: "switch" nls? (simple_stmt? eos)? expression? "{" nls? ((expr_case_clause | statement? eos)* (expr_case_clause | statement? eos?))? "}"
112112

113-
expr_case_clause: expr_switch_case ":" statement_list?
113+
expr_case_clause: expr_switch_case ":" nls?
114114

115-
expr_switch_case: "case" expression_list | "default"
115+
expr_switch_case: "case" nls? expression_list | "default" nls?
116116

117-
type_switch_stmt: "switch" ( type_switch_guard | eos type_switch_guard | simple_stmt eos type_switch_guard) "{" type_case_clause* "}"
117+
type_switch_stmt: "switch" nls? (simple_stmt? eos)? type_switch_guard "{" nls? ((type_case_clause | statement? eos)* (type_case_clause | statement? eos?))? "}"
118118

119119
// type_switch_guard: (NAME ":=")? primary_expr "." "(" "type" ")"
120-
type_switch_guard: (NAME ":=")? NAME "." "(" "type" ")"
120+
type_switch_guard: (NAME ":=" nls?)? NAME "." nls? "(" nls? "type" nls? ")"
121121

122-
type_case_clause: type_switch_case ":" statement_list?
122+
type_case_clause: type_switch_case ":" nls?
123123

124-
type_switch_case: "case" type_list | "default"
124+
type_switch_case: "case" nls? type_list | "default" nls?
125125

126-
type_list: (type_ | "nil" ) ("," (type_ | "nil" ))*
126+
type_list: type_ ("," nls? type_)*
127127

128-
select_stmt: "select" "{" comm_clause* "}"
128+
select_stmt: "select" nls? "{" nls? ((comm_clause | statement? eos)* (comm_clause | statement? eos?))? "}"
129129

130-
comm_clause: comm_case ":" statement_list?
130+
comm_clause: comm_case ":" nls?
131131

132-
comm_case: "case" (send_stmt | recv_stmt) | "default"
132+
comm_case: "case" nls? (send_stmt | recv_stmt) | "default" nls?
133133

134-
recv_stmt: (expression_list "=" | identifier_list ":=")? expression
134+
recv_stmt: (expression_list "=" nls? | (identifier_list | NAME) ":=" nls?)? expression
135135

136-
for_stmt: "for" [for_clause] block
136+
for_stmt: "for" nls? [for_clause] block
137137

138138
for_clause: simple_stmt (eos expression eos simple_stmt)? | range_clause
139139

140-
range_clause: (expression_list "=" | expression_list ":=") "range" expression
140+
range_clause: (expression_list "=" nls? | expression_list ":=" nls?) "range" nls? expression
141141

142-
go_stmt: "go"expression
142+
go_stmt: "go" nls? expression
143143

144-
type_: literal_type | var_or_type_name type_args? | "(" type_ ")"
144+
type_: literal_type | (var_or_type_name | NAME) type_args? | "(" nls? type_ ")"
145145

146146
// type_lit: array_type | struct_type | pointer_type | function_type | interface_type | slice_type | map_type | channel_type
147147

148148
type_args : "--"
149-
// type_args: "[" type_list ","? "]" // This is useful for Golng gen
149+
// type_args: "[" nls? type_list ("," nls?)? "]" // This is useful for Golng gen
150150

151-
var_or_type_name: NAME "." NAME | NAME | NAME "." "(" type_ ")"
151+
var_or_type_name: NAME "." nls? NAME | NAME "." nls? "(" nls? type_ ")"
152152

153-
array_type: "[" array_length "]" element_type
153+
array_type: "[" nls? array_length "]" element_type
154154

155155
array_length: expression
156156

157157
element_type: type_
158158

159-
pointer_type: "*" type_
159+
pointer_type: "*" nls? type_
160160

161-
interface_type: "interface" "{" ((method_spec | type_element ) eos)* "}"
161+
interface_type: "interface" nls? "{" nls? (((method_spec | type_element ) eos)* (method_spec | type_element ) eos?)? "}"
162162

163-
slice_type: "[" "]" element_type
163+
slice_type: "[" nls? "]" element_type
164164

165165
// It's possible to replace `type` with more restricted type_lit list and also pay attention to nil maps
166-
map_type: "map" "[" type_ "]" element_type
166+
map_type: "map" nls? "[" nls? type_ "]" element_type
167167

168-
channel_type: ("chan" | "chan" "<-" | "<-" "chan" ) element_type
168+
channel_type: ("chan" | "chan" nls? "<-" | "<-" nls? "chan" ) nls? element_type
169169

170-
method_spec: NAME parameters result | NAME parameters
170+
method_spec: NAME signature
171171

172-
function_type: "func" signature
172+
function_type: "func" nls? signature
173173

174174
signature: parameters result?
175175

176176
result: parameters | type_
177177

178-
parameters: "(" parameter_decl ("," parameter_decl)* ","? ")" | "(" ")"
178+
parameters: "(" nls? parameter_decl ("," nls? parameter_decl)* ("," nls?)? ")" | "(" nls? ")"
179179

180180
// a comma-separated list of either (a) name, (b) type, or (c) name and type
181181
// https://groups.google.com/g/golang-nuts/c/jVjbH2-emMQ/m/UdZlSNhd3DwJ
182182
// parameter_decl: identifier_list? "..."? type_
183183
// parameter_decl: (NAME | "..."? type_ | NAME type_)
184184

185185
// Although following is overapproximate it's an easy way to avoid reduce/reduce conflicts
186-
parameter_decl: (type_ | "..."? type_ | NAME type_)
186+
parameter_decl: (type_ | ("..." nls?)? type_ | NAME type_)
187187

188188

189189
expression: primary_expr
190-
| ("+" | "-" | "!" | "^" | "*" | "&" | "<-") expression
191-
| expression ("*" | "/" | "%" | "<<" | ">>" | "&" | "&^") expression
192-
| expression ("+" | "-" | "|" | "^") expression
193-
| expression ("==" | "!=" | "<" | "<=" | ">" | ">=") expression
194-
| expression "&&" expression
195-
| expression "||" expression
190+
| ("+" | "-" | "!" | "^" | "*" | "&" | "<-") nls? expression
191+
| expression ("*" | "/" | "%" | "<<" | ">>" | "&" | "&^") nls? expression
192+
| expression ("+" | "-" | "|" | "^") nls? expression
193+
| expression ("==" | "!=" | "<" | "<=" | ">" | ">=") nls? expression
194+
| expression "&&" nls? expression
195+
| expression "||" nls? expression
196196

197-
primary_expr: operand | primary_expr ("." (NAME | "(" type_ ")") | index | slice_ | arguments) | type_
197+
primary_expr: operand | primary_expr ("." nls? (NAME | "(" nls? type_ ")") | index | slice_ | arguments)
198198

199199
// conversion is not needed since a method call has includes this syntax
200200
// conversion: type_ "(" expression ","? ")"
201201

202202
// Giving operand higher precedence than type_ is a hack to avoid reduce/reduce conflicts
203-
operand.3: literal | NAME | "(" expression ")" // removed NAME type_args?
203+
operand: literal | type_ | "(" expression ")" // removed NAME type_args?
204204

205205
literal: basic_lit | composite_lit | function_lit
206206

207-
basic_lit: "nil" | integer | string_ | FLOAT_LIT | CHAR_LIT
207+
basic_lit: integer | string_ | FLOAT_LIT | CHAR_LIT
208208

209209
integer: DECIMAL_LIT | BINARY_LIT | OCTAL_LIT | HEX_LIT
210210
// integer: DECIMAL_LIT | BINARY_LIT | OCTAL_LIT | HEX_LIT | IMAGINARY_LIT | RUNE_LIT
@@ -218,55 +218,56 @@ CHAR_LIT: /'/ (/[^'\\]/ | ESCAPED_VALUE) /'/
218218

219219
composite_lit: literal_type literal_value
220220

221-
literal_type: struct_type | array_type | "[" "..." "]" element_type | slice_type | map_type | "interface" "{" "}"
221+
literal_type: struct_type | array_type | "[" nls? "..." nls? "]" element_type | slice_type | map_type | "interface" nls? "{" nls? "}"
222222

223-
literal_value: "{" (element_list ","?)? "}"
223+
literal_value: "{" nls? (element_list ("," nls?)?)? "}"
224224

225-
element_list: keyed_element ("," keyed_element)*
225+
element_list: keyed_element ("," nls? keyed_element)*
226226

227-
keyed_element: (key ":")? element
227+
keyed_element: (key ":" nls?)? element
228228

229229
key: expression | literal_value
230230

231231
element: expression | literal_value
232232

233-
struct_type: "struct" "{" (field_decl eos)* "}"
233+
struct_type: "struct" nls? "{" nls? ((field_decl eos)* field_decl eos?)? "}"
234234

235-
field_decl: (identifier_list type_ | embedded_field) string_?
235+
field_decl: ((identifier_list | NAME) type_ | embedded_field) string_?
236236

237237
string_: RAW_STRING_LIT | INTERPRETED_STRING_LIT
238238

239239
// RAW_STRING_LIT : '`' ~'`'* '`' -> mode(NLSEMI);
240240
// INTERPRETED_STRING_LIT : '"' (~["\\] | ESCAPED_VALUE)* '"' -> mode(NLSEMI);
241241

242-
RAW_STRING_LIT: /`.*?`/s
242+
RAW_STRING_LIT: /`[^`]*`/s
243243
INTERPRETED_STRING_LIT: /"/ (/[^"\\]/ | ESCAPED_VALUE)* /"/
244244

245245
ESCAPED_VALUE: /\\(u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|[abfnrtv\\'"]|[0-7]{3}|x[0-9a-fA-F]{2})/
246246

247-
embedded_field: "*"? (NAME "." NAME | NAME) type_args?
247+
embedded_field: ("*" nls?)? (NAME "." nls? NAME | NAME) type_args?
248248

249-
function_lit: "func" signature block // function
249+
function_lit: "func" nls? signature block // function
250250

251-
index: "[" expression "]"
251+
index: "[" nls? expression ("," nls?)? "]"
252252

253-
slice_: "[" ( expression? ":" expression? | expression? ":" expression ":" expression) "]"
253+
slice_: "[" nls? ( expression? ":" nls? expression? | expression? ":" nls? expression ":" nls? expression) "]"
254254

255-
type_assertion: "." "(" type_ ")"
255+
type_assertion: "." nls? "(" nls? type_ ")"
256256

257257
// arguments: "(" ( (expression_list | type_ ("," expression_list)?) "..."? ","?)? ")"
258-
arguments: "(" ( expression_list? "..."? ","?)? ")"
258+
arguments: "(" nls? ( expression_list? ("..." nls?)? ("," nls?)?)? ")"
259259
// method_expr: type_ "." NAME
260260

261-
eos: ";" | EOS // | {this.closingBracket()}?
261+
eos: semi | nls
262+
semi: ";" NL*
263+
nls: NL+
262264

263265
NAME : /[a-zA-Z_]\w*/
264-
EOS: _NL | ";" | /\/\*.*?\*\//s
265266

266-
COMMENT : /\/\/[^\n]*/
267-
_NL: ( /(\r?\n[\t ]*)+/ | COMMENT)+
267+
COMMENT : /\/\/[^\n]*\n/
268+
NL: COMMENT | /(\r?\n[\t ]*)+/ | /\/\*[^\n]*\n.*?\*\//s
268269

269270
// %import common.WS_INLINE
270271
// %ignore WS_INLINE
271-
%ignore /[\t ]/
272-
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
272+
IGNORED: /[\t ]/ | /\/\*[^\n]*?\*\//
273+
%ignore IGNORED

0 commit comments

Comments
 (0)