Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 20 additions & 10 deletions bashlex/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,13 @@ class tokentype(enum.Enum):
TIMEOPT = 22
TIMEIGN = 23
WORD = 24
ASSIGNMENT_WORD = 25
REDIR_WORD = 26
NUMBER = 27
ARITH_CMD = 28
ARITH_FOR_EXPRS = 29
COND_CMD = 30
LET = 25
ASSIGNMENT_WORD = 26
REDIR_WORD = 27
NUMBER = 28
ARITH_CMD = 29
ARITH_FOR_EXPRS = 30
COND_CMD = 31
AND_AND = '&&'
OR_OR = '||'
GREATER_GREATER = '>>'
Expand Down Expand Up @@ -96,7 +97,7 @@ class tokentype(enum.Enum):
tokentype.FI, tokentype.IF, tokentype.OR_OR, tokentype.SEMI_SEMI,
tokentype.SEMI_AND, tokentype.SEMI_SEMI_AND, tokentype.THEN,
tokentype.TIME, tokentype.TIMEOPT, tokentype.TIMEIGN, tokentype.COPROC,
tokentype.UNTIL, tokentype.WHILE])
tokentype.UNTIL, tokentype.WHILE, tokentype.LET])

for c in '\n;()|&{}':
_reserved.add(c)
Expand Down Expand Up @@ -124,7 +125,8 @@ class tokentype(enum.Enum):
"!" : tokentype.BANG,
"[[" : tokentype.COND_START,
"]]" : tokentype.COND_END,
"coproc" : tokentype.COPROC
"coproc" : tokentype.COPROC,
"let" : tokentype.LET
}

class MatchedPairError(errors.ParsingError):
Expand Down Expand Up @@ -294,6 +296,11 @@ def _readtoken(self):
character = self._getc(True)
while character is not None and _shellblank(character):
character = self._getc(True)
if character == '\\':
peek_char = self._getc(False)
if peek_char != '\n':
self._ungetc(peek_char)
peek_char = None

if character is None:
return eoftoken
Expand All @@ -303,6 +310,7 @@ def _readtoken(self):
self._getc(False)
character = '\n'


self._recordpos(1)

if character == '\n':
Expand Down Expand Up @@ -877,7 +885,7 @@ def handledollarword():
assert False # pragma: no cover

while count:
c = self._getc(doublequotes != "'" and not passnextchar)
c = self._getc(doublequotes != '"' and doublequotes != "'" and not passnextchar)
if c is None:
raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)

Expand Down Expand Up @@ -1040,7 +1048,9 @@ def _getc(self, remove_quoted_newline=True):
else:
c = None

if c == '\\' and remove_quoted_newline and self._shell_input_line[self._shell_input_line_index] == '\n':
if c == '\\' and remove_quoted_newline and self._shell_input_line_index < len(self._shell_input_line) \
and self._shell_input_line[self._shell_input_line_index] == '\n':
self._shell_input_line_index += 1
self._line_number += 1
continue
else:
Expand Down
15 changes: 15 additions & 0 deletions tests/test-parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,13 @@ def test_command(self):
wordnode('b'),
wordnode('c', '"c"')))

s = 'a \\\nb \\\n"c"'
self.assertASTEquals(s,
commandnode(s,
wordnode('a'),
wordnode('b'),
wordnode('c', '"c"')))

s = '2>/dev/null a b "c"'
self.assertASTEquals(s,
commandnode(s,
Expand Down Expand Up @@ -847,6 +854,14 @@ def test_for(self):

def test_assignments(self):
# assignments must appear before the first word
# s = 'let a=b'
# self.assertASTEquals(s,
# commandnode(s,
# reservedwordnode('let', 'let'),
# assignmentnode(s='a=b', word='a=b'),
# )
# )

s = 'a=b c e=d'
self.assertASTEquals(s,
commandnode(s,
Expand Down
54 changes: 54 additions & 0 deletions tests/test-tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,19 @@ def test_comment(self):
t(tt.BAR, '|', [0, 1])])

def test_shellquote(self):
s = '"foo\'"'
self.assertTokens(s, [
t(tt.WORD, '"foo\'"', [0, 6], set([flags.word.QUOTED]))])

s = '"foo"'
self.assertTokens(s, [
t(tt.WORD, '"foo"', [0, 5], set([flags.word.QUOTED]))])


s = '"foo\n"'
self.assertTokens(s, [
t(tt.WORD, '"foo\n"', [0, 6], set([flags.word.QUOTED]))])

s = '"foo"bar\'baz\''
self.assertTokens(s, [
t(tt.WORD, s, [0, len(s)], set([flags.word.QUOTED]))])
Expand All @@ -82,6 +91,28 @@ def test_shellquote(self):
tokenize,
"'a")

s = '"foo\\ \n"'
self.assertTokens(s, [
t(tt.WORD, '"foo\\ \n"', [0, 8], set([flags.word.QUOTED]))])

s = '"foo\\\n"'
self.assertTokens(s, [
t(tt.WORD, '"foo\\\n"', [0, 7], set([flags.word.QUOTED]))])


s = '"foo\\\'"'
self.assertTokens(s, [
t(tt.WORD, '"foo\\\'"', [0, 7], set([flags.word.QUOTED]))])

s = "'foo\"'"
self.assertTokens(s, [
t(tt.WORD, "'foo\"'", [0, 6], set([flags.word.QUOTED]))])

s = '"foo\'"'
self.assertTokens(s, [
t(tt.WORD, '"foo\'"', [0, 6], set([flags.word.QUOTED]))])


def test_shellexp(self):
s = '<(foo) bar $(baz) ${a}'
self.assertTokens(s, [
Expand Down Expand Up @@ -243,6 +274,18 @@ def test_parsematchedpair(self):
# t(tt.WORD, '"\\a"', flags=set([flags.word.QUOTED]))])

def test_assignment(self):
s = 'let a=b'
self.assertTokens(s, [
t(tt.LET, 'let', [0, 3]),
t(tt.ASSIGNMENT_WORD, 'a=b', [4, 7],
flags=set([flags.word.NOSPLIT, flags.word.ASSIGNMENT]))])

s = 'let a+=b'
self.assertTokens(s, [
t(tt.LET, 'let', [0, 3]),
t(tt.ASSIGNMENT_WORD, 'a+=b', [4, 8],
flags=set([flags.word.NOSPLIT, flags.word.ASSIGNMENT]))])

s = 'a=b'
self.assertTokens(s, [
t(tt.ASSIGNMENT_WORD, 'a=b', [0, 3],
Expand Down Expand Up @@ -308,6 +351,17 @@ def test_escape_error(self):

self.assertRaisesRegexp(errors.ParsingError, "No escaped character.*position 2", tokenize, s)

def test_line_continuation(self):
s = 'a \\\nb'
self.assertTokens(s, [
t(tt.WORD, 'a', [0, 1]),
t(tt.WORD, 'b', [5, 6])])

s = '\\\na \\\nb'
self.assertTokens(s, [
t(tt.WORD, 'a', [2, 3]),
t(tt.WORD, 'b', [7, 8])])

def test_tokenize(self):
s = 'bar -x'
self.assertTokens(s, [
Expand Down