diff --git a/README.md b/README.md index cd838cb..5ad6253 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # pyjsparser Fast JavaScript parser - manual translation of esprima.js to python. Takes 1 second to parse whole angular.js library so parsing speed is about 100k characters per second which makes it the fastest and most comprehensible JavaScript parser for python out there. -Supports whole ECMAScript 5.1 and parts of ECMAScript 6. +Supports whole ECMAScript 5.1 and parts of ECMAScript 6. If you need full ECMA 6 support I recomment trying out [this](https://github.com/Kronuz/esprima-python) parser by Kronuz. # Installation diff --git a/pyjsparser/__init__.py b/pyjsparser/__init__.py index 2a9de69..a90fc4a 100644 --- a/pyjsparser/__init__.py +++ b/pyjsparser/__init__.py @@ -1,4 +1,4 @@ __all__ = ['PyJsParser', 'parse', 'JsSyntaxError'] __author__ = 'Piotr Dabkowski' -__version__ = '2.2.0' +__version__ = '2.5.2' from .parser import PyJsParser, parse, JsSyntaxError \ No newline at end of file diff --git a/pyjsparser/parser.py b/pyjsparser/parser.py index 71a8e39..7360b23 100644 --- a/pyjsparser/parser.py +++ b/pyjsparser/parser.py @@ -94,9 +94,16 @@ def skipSingleLineComment(self, offset): self.lineNumber += 1 self.hasLineTerminator = True self.lineStart = self.index - return + return { + 'type': 'Line', + 'value': self.source[start + offset:self.index-2], + 'leading': True, + 'trailing': False, + 'loc': None, + } def skipMultiLineComment(self): + start = self.index while self.index < self.length: ch = ord(self.source[self.index]) if isLineTerminator(ch): @@ -110,7 +117,13 @@ def skipMultiLineComment(self): # Block comment ends with '*/'. if ord(self.source[self.index + 1]) == 0x2F: self.index += 2 - return + return { + 'type': 'Block', + 'value': self.source[start:self.index-2], + 'leading': True, + 'trailing': False, + 'loc': None, + } self.index += 1 else: self.index += 1 @@ -118,7 +131,9 @@ def skipMultiLineComment(self): def skipComment(self): self.hasLineTerminator = False + startIndex = self.index start = (self.index == 0) + comments = [] while self.index < self.length: ch = ord(self.source[self.index]) if isWhiteSpace(ch): @@ -135,11 +150,11 @@ def skipComment(self): ch = ord(self.source[self.index + 1]) if (ch == 0x2F): self.index += 2 - self.skipSingleLineComment(2) + comments.append(self.skipSingleLineComment(2)) start = True elif (ch == 0x2A): # U+002A is '*' self.index += 2 - self.skipMultiLineComment() + comments.append(self.skipMultiLineComment()) else: break elif (start and ch == 0x2D): # U+002D is '-' @@ -159,6 +174,7 @@ def skipComment(self): break else: break + return filter(None, comments) def scanHexEscape(self, prefix): code = 0 @@ -828,10 +844,10 @@ def scanRegExpFlags(self): 'value': flags, 'literal': st} - def scanRegExp(self): + def scanRegExp(self, comments): self.scanning = True self.lookahead = None - self.skipComment() + comments.extend(self.skipComment()) start = self.index body = self.scanRegExpBody() @@ -846,17 +862,22 @@ def scanRegExp(self): 'flags': flags['value'] }, 'start': start, - 'end': self.index} + 'end': self.index, + 'comments': comments} def collectRegex(self): - self.skipComment(); - return self.scanRegExp() + return self.scanRegExp(self.skipComment()) def isIdentifierName(self, token): return token['type'] in (1, 3, 4, 5) # def advanceSlash(self): ??? + def advanceWithComments(self, comments): + token = self.advance() + token['comments'] = comments + return token + def advance(self): if (self.index >= self.length): return { @@ -933,7 +954,7 @@ def lex(self): self.lastLineNumber = self.lineNumber self.lastLineStart = self.lineStart - self.skipComment() + comments = self.skipComment() token = self.lookahead @@ -941,14 +962,14 @@ def lex(self): self.startLineNumber = self.lineNumber self.startLineStart = self.lineStart - self.lookahead = self.advance() + self.lookahead = self.advanceWithComments(comments) self.scanning = False return token def peek(self): self.scanning = True - self.skipComment() + comments = self.skipComment() self.lastIndex = self.index self.lastLineNumber = self.lineNumber @@ -958,7 +979,7 @@ def peek(self): self.startLineNumber = self.lineNumber self.startLineStart = self.lineStart - self.lookahead = self.advance() + self.lookahead = self.advanceWithComments(comments) self.scanning = False def createError(self, line, pos, description): @@ -1362,6 +1383,7 @@ def checkProto(self, key, computed, hasProto): def parseObjectProperty(self, hasProto): token = self.lookahead node = Node() + node.comments = self.lookahead.get('comments', []) computed = self.match('['); key = self.parseObjectPropertyKey(); @@ -1393,9 +1415,8 @@ def parseObjectInitialiser(self): properties = [] hasProto = {'value': false} node = Node(); - + node.comments = self.lookahead.get('comments', []) self.expect('{'); - while (not self.match('}')): properties.append(self.parseObjectProperty(hasProto)); @@ -1531,6 +1552,7 @@ def parsePrimaryExpression(self): typ = self.lookahead['type'] node = Node(); + node.comments = self.lookahead.get('comments', []) if (typ == Token.Identifier): expr = node.finishIdentifier(self.lex()['value']); @@ -1562,7 +1584,7 @@ def parsePrimaryExpression(self): elif (self.match('/') or self.match('/=')): self.isAssignmentTarget = self.isBindingElement = false; self.index = self.startIndex; - token = self.scanRegExp(); # hehe, here you are! + token = self.scanRegExp([]); # hehe, here you are! self.lex(); expr = node.finishLiteral(token); elif (typ == Token.Template): @@ -2029,7 +2051,6 @@ def parseVariableIdentifier(self): def parseVariableDeclaration(self): init = null node = Node(); - d = self.parsePattern(); # 12.2.1 @@ -2058,7 +2079,6 @@ def parseVariableDeclarationList(self): def parseVariableStatement(self, node): self.expectKeyword('var') - declarations = self.parseVariableDeclarationList() self.consumeSemicolon() @@ -2527,6 +2547,7 @@ def parseStatement(self): self.isAssignmentTarget = self.isBindingElement = true; node = Node(); + node.comments = self.lookahead.get('comments', []) val = self.lookahead['value'] if (typ == Token.Punctuator): @@ -2703,6 +2724,7 @@ def parseParams(self, firstRestricted): 'message': options.get('message')} def parseFunctionDeclaration(self, node, identifierIsOptional=None): + node.comments = self.lookahead.get('comments', []) d = null params = [] defaults = [] @@ -2748,6 +2770,7 @@ def parseFunctionExpression(self): params = [] defaults = [] node = Node(); + node.comments = self.lookahead.get('comments', []) firstRestricted = None message = None diff --git a/pyjsparser/std_nodes.py b/pyjsparser/std_nodes.py index a4b37ca..bee67b8 100644 --- a/pyjsparser/std_nodes.py +++ b/pyjsparser/std_nodes.py @@ -451,7 +451,6 @@ def __getitem__(self, item): def __setitem__(self, key, value): setattr(self, key, value) - class Node(BaseNode): pass