diff --git a/bashlex/tokenizer.py b/bashlex/tokenizer.py index 46ed3980..4717e5ee 100644 --- a/bashlex/tokenizer.py +++ b/bashlex/tokenizer.py @@ -55,12 +55,13 @@ class tokentype(enum.Enum): TIMEOPT = 22 TIMEIGN = 23 WORD = 24 - ASSIGNMENT_WORD = 25 - REDIR_WORD = 26 - NUMBER = 27 - ARITH_CMD = 28 - ARITH_FOR_EXPRS = 29 - COND_CMD = 30 + LET = 25 + ASSIGNMENT_WORD = 26 + REDIR_WORD = 27 + NUMBER = 28 + ARITH_CMD = 29 + ARITH_FOR_EXPRS = 30 + COND_CMD = 31 AND_AND = '&&' OR_OR = '||' GREATER_GREATER = '>>' @@ -96,7 +97,7 @@ class tokentype(enum.Enum): tokentype.FI, tokentype.IF, tokentype.OR_OR, tokentype.SEMI_SEMI, tokentype.SEMI_AND, tokentype.SEMI_SEMI_AND, tokentype.THEN, tokentype.TIME, tokentype.TIMEOPT, tokentype.TIMEIGN, tokentype.COPROC, - tokentype.UNTIL, tokentype.WHILE]) + tokentype.UNTIL, tokentype.WHILE, tokentype.LET]) for c in '\n;()|&{}': _reserved.add(c) @@ -124,7 +125,8 @@ class tokentype(enum.Enum): "!" : tokentype.BANG, "[[" : tokentype.COND_START, "]]" : tokentype.COND_END, - "coproc" : tokentype.COPROC + "coproc" : tokentype.COPROC, + "let" : tokentype.LET } class MatchedPairError(errors.ParsingError): @@ -294,6 +296,11 @@ def _readtoken(self): character = self._getc(True) while character is not None and _shellblank(character): character = self._getc(True) + if character == '\\': + peek_char = self._getc(False) + if peek_char != '\n': + self._ungetc(peek_char) + peek_char = None if character is None: return eoftoken @@ -303,6 +310,7 @@ def _readtoken(self): self._getc(False) character = '\n' + self._recordpos(1) if character == '\n': @@ -877,7 +885,7 @@ def handledollarword(): assert False # pragma: no cover while count: - c = self._getc(doublequotes != "'" and not passnextchar) + c = self._getc(doublequotes != '"' and doublequotes != "'" and not passnextchar) if c is None: raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self) @@ -1040,7 +1048,9 @@ def _getc(self, remove_quoted_newline=True): else: c = None - if c == '\\' and remove_quoted_newline and self._shell_input_line[self._shell_input_line_index] == '\n': + if c == '\\' and remove_quoted_newline and self._shell_input_line_index < len(self._shell_input_line) \ + and self._shell_input_line[self._shell_input_line_index] == '\n': + self._shell_input_line_index += 1 self._line_number += 1 continue else: diff --git a/tests/test-parser.py b/tests/test-parser.py index 3a6ce180..dba68615 100644 --- a/tests/test-parser.py +++ b/tests/test-parser.py @@ -126,6 +126,13 @@ def test_command(self): wordnode('b'), wordnode('c', '"c"'))) + s = 'a \\\nb \\\n"c"' + self.assertASTEquals(s, + commandnode(s, + wordnode('a'), + wordnode('b'), + wordnode('c', '"c"'))) + s = '2>/dev/null a b "c"' self.assertASTEquals(s, commandnode(s, @@ -847,6 +854,14 @@ def test_for(self): def test_assignments(self): # assignments must appear before the first word +# s = 'let a=b' +# self.assertASTEquals(s, +# commandnode(s, +# reservedwordnode('let', 'let'), +# assignmentnode(s='a=b', word='a=b'), +# ) +# ) + s = 'a=b c e=d' self.assertASTEquals(s, commandnode(s, diff --git a/tests/test-tokenizer.py b/tests/test-tokenizer.py index de70f18d..52d28822 100644 --- a/tests/test-tokenizer.py +++ b/tests/test-tokenizer.py @@ -70,10 +70,19 @@ def test_comment(self): t(tt.BAR, '|', [0, 1])]) def test_shellquote(self): + s = '"foo\'"' + self.assertTokens(s, [ + t(tt.WORD, '"foo\'"', [0, 6], set([flags.word.QUOTED]))]) + s = '"foo"' self.assertTokens(s, [ t(tt.WORD, '"foo"', [0, 5], set([flags.word.QUOTED]))]) + + s = '"foo\n"' + self.assertTokens(s, [ + t(tt.WORD, '"foo\n"', [0, 6], set([flags.word.QUOTED]))]) + s = '"foo"bar\'baz\'' self.assertTokens(s, [ t(tt.WORD, s, [0, len(s)], set([flags.word.QUOTED]))]) @@ -82,6 +91,28 @@ def test_shellquote(self): tokenize, "'a") + s = '"foo\\ \n"' + self.assertTokens(s, [ + t(tt.WORD, '"foo\\ \n"', [0, 8], set([flags.word.QUOTED]))]) + + s = '"foo\\\n"' + self.assertTokens(s, [ + t(tt.WORD, '"foo\\\n"', [0, 7], set([flags.word.QUOTED]))]) + + + s = '"foo\\\'"' + self.assertTokens(s, [ + t(tt.WORD, '"foo\\\'"', [0, 7], set([flags.word.QUOTED]))]) + + s = "'foo\"'" + self.assertTokens(s, [ + t(tt.WORD, "'foo\"'", [0, 6], set([flags.word.QUOTED]))]) + + s = '"foo\'"' + self.assertTokens(s, [ + t(tt.WORD, '"foo\'"', [0, 6], set([flags.word.QUOTED]))]) + + def test_shellexp(self): s = '<(foo) bar $(baz) ${a}' self.assertTokens(s, [ @@ -243,6 +274,18 @@ def test_parsematchedpair(self): # t(tt.WORD, '"\\a"', flags=set([flags.word.QUOTED]))]) def test_assignment(self): + s = 'let a=b' + self.assertTokens(s, [ + t(tt.LET, 'let', [0, 3]), + t(tt.ASSIGNMENT_WORD, 'a=b', [4, 7], + flags=set([flags.word.NOSPLIT, flags.word.ASSIGNMENT]))]) + + s = 'let a+=b' + self.assertTokens(s, [ + t(tt.LET, 'let', [0, 3]), + t(tt.ASSIGNMENT_WORD, 'a+=b', [4, 8], + flags=set([flags.word.NOSPLIT, flags.word.ASSIGNMENT]))]) + s = 'a=b' self.assertTokens(s, [ t(tt.ASSIGNMENT_WORD, 'a=b', [0, 3], @@ -308,6 +351,17 @@ def test_escape_error(self): self.assertRaisesRegexp(errors.ParsingError, "No escaped character.*position 2", tokenize, s) + def test_line_continuation(self): + s = 'a \\\nb' + self.assertTokens(s, [ + t(tt.WORD, 'a', [0, 1]), + t(tt.WORD, 'b', [5, 6])]) + + s = '\\\na \\\nb' + self.assertTokens(s, [ + t(tt.WORD, 'a', [2, 3]), + t(tt.WORD, 'b', [7, 8])]) + def test_tokenize(self): s = 'bar -x' self.assertTokens(s, [