idank · sarvi · Jun 7, 2019 · Jun 7, 2019
diff --git a/bashlex/tokenizer.py b/bashlex/tokenizer.py
@@ -55,12 +55,13 @@ class tokentype(enum.Enum):
     TIMEOPT = 22
     TIMEIGN = 23
     WORD = 24
-    ASSIGNMENT_WORD = 25
-    REDIR_WORD = 26
-    NUMBER = 27
-    ARITH_CMD = 28
-    ARITH_FOR_EXPRS = 29
-    COND_CMD = 30
+    LET = 25
+    ASSIGNMENT_WORD = 26
+    REDIR_WORD = 27
+    NUMBER = 28
+    ARITH_CMD = 29
+    ARITH_FOR_EXPRS = 30
+    COND_CMD = 31
     AND_AND = '&&'
     OR_OR = '||'
     GREATER_GREATER = '>>'
@@ -96,7 +97,7 @@ class tokentype(enum.Enum):
     tokentype.FI, tokentype.IF, tokentype.OR_OR, tokentype.SEMI_SEMI,
     tokentype.SEMI_AND, tokentype.SEMI_SEMI_AND, tokentype.THEN,
     tokentype.TIME, tokentype.TIMEOPT, tokentype.TIMEIGN, tokentype.COPROC,
-    tokentype.UNTIL, tokentype.WHILE])
+    tokentype.UNTIL, tokentype.WHILE, tokentype.LET])
 
 for c in '\n;()|&{}':
     _reserved.add(c)
@@ -124,7 +125,8 @@ class tokentype(enum.Enum):
     "!" : tokentype.BANG,
     "[[" : tokentype.COND_START,
     "]]" : tokentype.COND_END,
-    "coproc" : tokentype.COPROC
+    "coproc" : tokentype.COPROC,
+    "let" : tokentype.LET
 }
 
 class MatchedPairError(errors.ParsingError):
@@ -294,6 +296,11 @@ def _readtoken(self):
         character = self._getc(True)
         while character is not None and _shellblank(character):
             character = self._getc(True)
+            if character == '\\':
+                peek_char = self._getc(False)
+                if peek_char != '\n':
+                    self._ungetc(peek_char)
+                peek_char = None
 
         if character is None:
             return eoftoken
@@ -303,6 +310,7 @@ def _readtoken(self):
             self._getc(False)
             character = '\n'
 
+
         self._recordpos(1)
 
         if character == '\n':
@@ -877,7 +885,7 @@ def handledollarword():
                 assert False # pragma: no cover
 
         while count:
-            c = self._getc(doublequotes != "'" and not passnextchar)
+            c = self._getc(doublequotes != '"' and doublequotes != "'" and not passnextchar)
             if c is None:
                 raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)
 
@@ -1040,7 +1048,9 @@ def _getc(self, remove_quoted_newline=True):
             else:
                 c = None
 
-            if c == '\\' and remove_quoted_newline and self._shell_input_line[self._shell_input_line_index] == '\n':
+            if c == '\\' and remove_quoted_newline and self._shell_input_line_index < len(self._shell_input_line) \
+               and self._shell_input_line[self._shell_input_line_index] == '\n':
+                self._shell_input_line_index += 1
                 self._line_number += 1
                 continue
             else:

diff --git a/tests/test-parser.py b/tests/test-parser.py
@@ -126,6 +126,13 @@ def test_command(self):
                   wordnode('b'),
                   wordnode('c', '"c"')))
 
+        s = 'a \\\nb \\\n"c"'
+        self.assertASTEquals(s,
+                commandnode(s,
+                  wordnode('a'),
+                  wordnode('b'),
+                  wordnode('c', '"c"')))
+
         s = '2>/dev/null a b "c"'
         self.assertASTEquals(s,
                 commandnode(s,
@@ -847,6 +854,14 @@ def test_for(self):
 
     def test_assignments(self):
         # assignments must appear before the first word
+#         s = 'let a=b'
+#         self.assertASTEquals(s,
+#                              commandnode(s,
+#                                reservedwordnode('let', 'let'),
+#                                assignmentnode(s='a=b', word='a=b'),
+#                              )
+#                             )
+
         s = 'a=b c e=d'
         self.assertASTEquals(s,
                              commandnode(s,

diff --git a/tests/test-tokenizer.py b/tests/test-tokenizer.py
@@ -70,10 +70,19 @@ def test_comment(self):
                           t(tt.BAR, '|', [0, 1])])
 
     def test_shellquote(self):
+        s = '"foo\'"'
+        self.assertTokens(s, [
+                          t(tt.WORD, '"foo\'"', [0, 6], set([flags.word.QUOTED]))])
+
         s = '"foo"'
         self.assertTokens(s, [
                           t(tt.WORD, '"foo"', [0, 5], set([flags.word.QUOTED]))])
 
+
+        s = '"foo\n"'
+        self.assertTokens(s, [
+                          t(tt.WORD, '"foo\n"', [0, 6], set([flags.word.QUOTED]))])
+
         s = '"foo"bar\'baz\''
         self.assertTokens(s, [
                           t(tt.WORD, s, [0, len(s)], set([flags.word.QUOTED]))])
@@ -82,6 +91,28 @@ def test_shellquote(self):
                           tokenize,
                           "'a")
 
+        s = '"foo\\ \n"'
+        self.assertTokens(s, [
+                          t(tt.WORD, '"foo\\ \n"', [0, 8], set([flags.word.QUOTED]))])
+
+        s = '"foo\\\n"'
+        self.assertTokens(s, [
+                          t(tt.WORD, '"foo\\\n"', [0, 7], set([flags.word.QUOTED]))])
+
+
+        s = '"foo\\\'"'
+        self.assertTokens(s, [
+                          t(tt.WORD, '"foo\\\'"', [0, 7], set([flags.word.QUOTED]))])
+
+        s = "'foo\"'"
+        self.assertTokens(s, [
+                          t(tt.WORD, "'foo\"'", [0, 6], set([flags.word.QUOTED]))])
+
+        s = '"foo\'"'
+        self.assertTokens(s, [
+                          t(tt.WORD, '"foo\'"', [0, 6], set([flags.word.QUOTED]))])
+
+
     def test_shellexp(self):
         s = '<(foo) bar $(baz) ${a}'
         self.assertTokens(s, [
@@ -243,6 +274,18 @@ def test_parsematchedpair(self):
         #                  t(tt.WORD, '"\\a"', flags=set([flags.word.QUOTED]))])
 
     def test_assignment(self):
+        s = 'let a=b'
+        self.assertTokens(s, [
+                          t(tt.LET, 'let', [0, 3]),
+                          t(tt.ASSIGNMENT_WORD, 'a=b', [4, 7],
+                            flags=set([flags.word.NOSPLIT, flags.word.ASSIGNMENT]))])
+
+        s = 'let a+=b'
+        self.assertTokens(s, [
+                          t(tt.LET, 'let', [0, 3]),
+                          t(tt.ASSIGNMENT_WORD, 'a+=b', [4, 8],
+                            flags=set([flags.word.NOSPLIT, flags.word.ASSIGNMENT]))])
+
         s = 'a=b'
         self.assertTokens(s, [
                           t(tt.ASSIGNMENT_WORD, 'a=b', [0, 3],
@@ -308,6 +351,17 @@ def test_escape_error(self):
 
         self.assertRaisesRegexp(errors.ParsingError, "No escaped character.*position 2", tokenize, s)
 
+    def test_line_continuation(self):
+        s = 'a  \\\nb'
+        self.assertTokens(s, [
+                          t(tt.WORD, 'a', [0, 1]),
+                          t(tt.WORD, 'b', [5, 6])])
+
+        s = '\\\na  \\\nb'
+        self.assertTokens(s, [
+                          t(tt.WORD, 'a', [2, 3]),
+                          t(tt.WORD, 'b', [7, 8])])
+
     def test_tokenize(self):
         s = 'bar -x'
         self.assertTokens(s, [