Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# pyjsparser
Fast JavaScript parser - manual translation of esprima.js to python. Takes 1 second to parse whole angular.js library so parsing speed is about 100k characters per second which makes it the fastest and most comprehensible JavaScript parser for python out there.

Supports whole ECMAScript 5.1 and parts of ECMAScript 6.
Supports whole ECMAScript 5.1 and parts of ECMAScript 6. If you need full ECMA 6 support I recomment trying out [this](https://github.com/Kronuz/esprima-python) parser by Kronuz.

# Installation

Expand Down
2 changes: 1 addition & 1 deletion pyjsparser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__all__ = ['PyJsParser', 'parse', 'JsSyntaxError']
__author__ = 'Piotr Dabkowski'
__version__ = '2.2.0'
__version__ = '2.5.2'
from .parser import PyJsParser, parse, JsSyntaxError
59 changes: 41 additions & 18 deletions pyjsparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,16 @@ def skipSingleLineComment(self, offset):
self.lineNumber += 1
self.hasLineTerminator = True
self.lineStart = self.index
return
return {
'type': 'Line',
'value': self.source[start + offset:self.index-2],
'leading': True,
'trailing': False,
'loc': None,
}

def skipMultiLineComment(self):
start = self.index
while self.index < self.length:
ch = ord(self.source[self.index])
if isLineTerminator(ch):
Expand All @@ -110,15 +117,23 @@ def skipMultiLineComment(self):
# Block comment ends with '*/'.
if ord(self.source[self.index + 1]) == 0x2F:
self.index += 2
return
return {
'type': 'Block',
'value': self.source[start:self.index-2],
'leading': True,
'trailing': False,
'loc': None,
}
self.index += 1
else:
self.index += 1
self.tolerateUnexpectedToken()

def skipComment(self):
self.hasLineTerminator = False
startIndex = self.index
start = (self.index == 0)
comments = []
while self.index < self.length:
ch = ord(self.source[self.index])
if isWhiteSpace(ch):
Expand All @@ -135,11 +150,11 @@ def skipComment(self):
ch = ord(self.source[self.index + 1])
if (ch == 0x2F):
self.index += 2
self.skipSingleLineComment(2)
comments.append(self.skipSingleLineComment(2))
start = True
elif (ch == 0x2A): # U+002A is '*'
self.index += 2
self.skipMultiLineComment()
comments.append(self.skipMultiLineComment())
else:
break
elif (start and ch == 0x2D): # U+002D is '-'
Expand All @@ -159,6 +174,7 @@ def skipComment(self):
break
else:
break
return filter(None, comments)

def scanHexEscape(self, prefix):
code = 0
Expand Down Expand Up @@ -828,10 +844,10 @@ def scanRegExpFlags(self):
'value': flags,
'literal': st}

def scanRegExp(self):
def scanRegExp(self, comments):
self.scanning = True
self.lookahead = None
self.skipComment()
comments.extend(self.skipComment())
start = self.index

body = self.scanRegExpBody()
Expand All @@ -846,17 +862,22 @@ def scanRegExp(self):
'flags': flags['value']
},
'start': start,
'end': self.index}
'end': self.index,
'comments': comments}

def collectRegex(self):
self.skipComment();
return self.scanRegExp()
return self.scanRegExp(self.skipComment())

def isIdentifierName(self, token):
return token['type'] in (1, 3, 4, 5)

# def advanceSlash(self): ???

def advanceWithComments(self, comments):
token = self.advance()
token['comments'] = comments
return token

def advance(self):
if (self.index >= self.length):
return {
Expand Down Expand Up @@ -933,22 +954,22 @@ def lex(self):
self.lastLineNumber = self.lineNumber
self.lastLineStart = self.lineStart

self.skipComment()
comments = self.skipComment()

token = self.lookahead

self.startIndex = self.index
self.startLineNumber = self.lineNumber
self.startLineStart = self.lineStart

self.lookahead = self.advance()
self.lookahead = self.advanceWithComments(comments)
self.scanning = False
return token

def peek(self):
self.scanning = True

self.skipComment()
comments = self.skipComment()

self.lastIndex = self.index
self.lastLineNumber = self.lineNumber
Expand All @@ -958,7 +979,7 @@ def peek(self):
self.startLineNumber = self.lineNumber
self.startLineStart = self.lineStart

self.lookahead = self.advance()
self.lookahead = self.advanceWithComments(comments)
self.scanning = False

def createError(self, line, pos, description):
Expand Down Expand Up @@ -1362,6 +1383,7 @@ def checkProto(self, key, computed, hasProto):
def parseObjectProperty(self, hasProto):
token = self.lookahead
node = Node()
node.comments = self.lookahead.get('comments', [])

computed = self.match('[');
key = self.parseObjectPropertyKey();
Expand Down Expand Up @@ -1393,9 +1415,8 @@ def parseObjectInitialiser(self):
properties = []
hasProto = {'value': false}
node = Node();

node.comments = self.lookahead.get('comments', [])
self.expect('{');

while (not self.match('}')):
properties.append(self.parseObjectProperty(hasProto));

Expand Down Expand Up @@ -1531,6 +1552,7 @@ def parsePrimaryExpression(self):

typ = self.lookahead['type']
node = Node();
node.comments = self.lookahead.get('comments', [])

if (typ == Token.Identifier):
expr = node.finishIdentifier(self.lex()['value']);
Expand Down Expand Up @@ -1562,7 +1584,7 @@ def parsePrimaryExpression(self):
elif (self.match('/') or self.match('/=')):
self.isAssignmentTarget = self.isBindingElement = false;
self.index = self.startIndex;
token = self.scanRegExp(); # hehe, here you are!
token = self.scanRegExp([]); # hehe, here you are!
self.lex();
expr = node.finishLiteral(token);
elif (typ == Token.Template):
Expand Down Expand Up @@ -2029,7 +2051,6 @@ def parseVariableIdentifier(self):
def parseVariableDeclaration(self):
init = null
node = Node();

d = self.parsePattern();

# 12.2.1
Expand Down Expand Up @@ -2058,7 +2079,6 @@ def parseVariableDeclarationList(self):

def parseVariableStatement(self, node):
self.expectKeyword('var')

declarations = self.parseVariableDeclarationList()

self.consumeSemicolon()
Expand Down Expand Up @@ -2527,6 +2547,7 @@ def parseStatement(self):

self.isAssignmentTarget = self.isBindingElement = true;
node = Node();
node.comments = self.lookahead.get('comments', [])
val = self.lookahead['value']

if (typ == Token.Punctuator):
Expand Down Expand Up @@ -2703,6 +2724,7 @@ def parseParams(self, firstRestricted):
'message': options.get('message')}

def parseFunctionDeclaration(self, node, identifierIsOptional=None):
node.comments = self.lookahead.get('comments', [])
d = null
params = []
defaults = []
Expand Down Expand Up @@ -2748,6 +2770,7 @@ def parseFunctionExpression(self):
params = []
defaults = []
node = Node();
node.comments = self.lookahead.get('comments', [])
firstRestricted = None
message = None

Expand Down
1 change: 0 additions & 1 deletion pyjsparser/std_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,6 @@ def __getitem__(self, item):
def __setitem__(self, key, value):
setattr(self, key, value)


class Node(BaseNode):
pass

Expand Down