From 8ec3d15bcc474de2280f3229d245054cc90eeca8 Mon Sep 17 00:00:00 2001 From: Samson Danziger Date: Tue, 1 Dec 2020 01:36:56 +0000 Subject: [PATCH 1/6] Blackify code --- README.md | 4 ++ lsif_indexer/analysis.py | 8 +-- lsif_indexer/consts.py | 8 +-- lsif_indexer/emitter.py | 50 ++++++++--------- lsif_indexer/index.py | 116 ++++++++++++++++----------------------- lsif_indexer/script.py | 33 +++++++---- pyproject.toml | 3 + setup.py | 2 +- 8 files changed, 106 insertions(+), 118 deletions(-) create mode 100644 pyproject.toml diff --git a/README.md b/README.md index 4c4887e..89a6220 100644 --- a/README.md +++ b/README.md @@ -53,3 +53,7 @@ brew install python@3 ``` You may need to write `pip3` instead of `pip` to get the correct version. + +## Styling + +[Black](https://github.com/psf/black) is used for code formatting and is configured in `pyproject.toml`. \ No newline at end of file diff --git a/lsif_indexer/analysis.py b/lsif_indexer/analysis.py index b78cbe3..9e62200 100644 --- a/lsif_indexer/analysis.py +++ b/lsif_indexer/analysis.py @@ -6,6 +6,7 @@ class Name: An object that represents a reference or definition of a variable in a particular source file. """ + def __init__(self, definition): self.definition = definition @@ -51,11 +52,6 @@ def get_names(source, filename): """ Retrieve a list of Name objects for the given source. """ - definitions = jedi.names( - source, - path=filename, - all_scopes=True, - references=True, - ) + definitions = jedi.names(source, path=filename, all_scopes=True, references=True) return [Name(d) for d in definitions] diff --git a/lsif_indexer/consts.py b/lsif_indexer/consts.py index a45c676..aff85e9 100644 --- a/lsif_indexer/consts.py +++ b/lsif_indexer/consts.py @@ -1,6 +1,6 @@ -INDEXER_VERSION = '0.1.0' -POSITION_ENCODING = 'utf-16' -PROTOCOL_VERSION = '0.4.0' +INDEXER_VERSION = "0.1.0" +POSITION_ENCODING = "utf-16" +PROTOCOL_VERSION = "0.4.0" -INDENT = ' ' +INDENT = " " MAX_HIGHLIGHT_RANGE = 40 diff --git a/lsif_indexer/emitter.py b/lsif_indexer/emitter.py index babb08b..3e823e6 100644 --- a/lsif_indexer/emitter.py +++ b/lsif_indexer/emitter.py @@ -10,6 +10,7 @@ class Emitter: type. The majority of the methods in this class definition are added dynamically via setattr (below). """ + def __init__(self, writer): self.writer = writer self._lines = 0 @@ -22,7 +23,7 @@ def emit(self, **kwargs): """ node_id = self._lines + 1 self._lines += 1 - self.writer.write({'id': node_id, **kwargs}) + self.writer.write({"id": node_id, **kwargs}) return node_id @@ -30,48 +31,50 @@ class FileWriter: """ FileWriter writes LSIF-dump data to the given file. """ + def __init__(self, file): self.file = file def write(self, data): - self.file.write(json.dumps(data, separators=(',', ':')) + '\n') + self.file.write(json.dumps(data, separators=(",", ":")) + "\n") class DBWriter: """ DBWriter writes LSIF-dump data into a SQLite database. """ + def __init__(self): pass def write(self, data): # TODO(efritz) - implement - raise RuntimeError('Unimplemented') + raise RuntimeError("Unimplemented") # A map from vertex labels to the fields they support. Fields # are ordered based on their positional argument construction. VERTEX_FIELDS = { - '$event': ['kind', 'scope', 'data'], - 'definitionResult': [], - 'document': ['languageId', 'uri', 'contents'], - 'hoverResult': ['result'], - 'metaData': ['version', 'positionEncoding', 'projectRoot'], - 'project': ['kind'], - 'range': ['start', 'end'], - 'referenceResult': [], - 'resultSet': [], + "$event": ["kind", "scope", "data"], + "definitionResult": [], + "document": ["languageId", "uri", "contents"], + "hoverResult": ["result"], + "metaData": ["version", "positionEncoding", "projectRoot"], + "project": ["kind"], + "range": ["start", "end"], + "referenceResult": [], + "resultSet": [], } # A map from edge labels to the fields they support. Fields # are ordered based on their positional argument construction. EDGE_FIELDS = { - 'contains': ['outV', 'inVs'], - 'item': ['outV', 'inVs', 'document', 'property'], - 'next': ['outV', 'inV'], - 'textDocument/definition': ['outV', 'inV'], - 'textDocument/hover': ['outV', 'inV'], - 'textDocument/references': ['outV', 'inV'], + "contains": ["outV", "inVs"], + "item": ["outV", "inVs", "document", "property"], + "next": ["outV", "inV"], + "textDocument/definition": ["outV", "inV"], + "textDocument/hover": ["outV", "inV"], + "textDocument/references": ["outV", "inV"], } @@ -81,21 +84,18 @@ def add_emitters(): edge type described above. The values for each field is supplied positionally and are optional. """ + def make_emitter(type_name, name, fields): def emitter(self, *args): - return self.emit( - type=type_name, - label=name, - **dict(zip(fields, args)), - ) + return self.emit(type=type_name, label=name, **dict(zip(fields, args))) return emitter - for type_name, field_map in [('vertex', VERTEX_FIELDS), ('edge', EDGE_FIELDS)]: + for type_name, field_map in [("vertex", VERTEX_FIELDS), ("edge", EDGE_FIELDS)]: for name, fields in field_map.items(): setattr( Emitter, - 'emit_{}'.format(name.replace('$', '').replace('/', '_').lower()), + "emit_{}".format(name.replace("$", "").replace("/", "_").lower()), make_emitter(type_name, name, fields), ) diff --git a/lsif_indexer/index.py b/lsif_indexer/index.py index 6a53657..85cafac 100644 --- a/lsif_indexer/index.py +++ b/lsif_indexer/index.py @@ -2,12 +2,9 @@ import contextlib import os -from .analysis import get_names -from .emitter import Emitter, FileWriter -from .consts import ( - INDENT, MAX_HIGHLIGHT_RANGE, - POSITION_ENCODING, PROTOCOL_VERSION, -) +from lsif_indexer.analysis import get_names +from lsif_indexer.emitter import Emitter, FileWriter +from lsif_indexer.consts import INDENT, MAX_HIGHLIGHT_RANGE, POSITION_ENCODING, PROTOCOL_VERSION class DefinitionMeta: @@ -16,6 +13,7 @@ class DefinitionMeta: This contains previously generated identifiers needed when later linking a name reference to its definition. """ + def __init__(self, range_id, result_set_id, contents): self.range_id = range_id self.result_set_id = result_set_id @@ -31,6 +29,7 @@ class FileIndexer: on a per-file basis, this class holds the majority of the indexer logic. """ + def __init__(self, filename, emitter, project_id, verbose, exclude_content): self.filename = filename self.emitter = emitter @@ -40,24 +39,21 @@ def __init__(self, filename, emitter, project_id, verbose, exclude_content): self.definition_metas = {} def index(self): - print('Indexing file {}'.format(self.filename)) + print("Indexing file {}".format(self.filename)) with open(self.filename) as f: source = f.read() - self.source_lines = source.split('\n') + self.source_lines = source.split("\n") - document_args = [ - 'py', - 'file://{}'.format(os.path.abspath(self.filename)), - ] + document_args = ["py", "file://{}".format(os.path.abspath(self.filename))] if not self.exclude_content: - encoded = base64.b64encode(source.encode('utf-8')).decode() + encoded = base64.b64encode(source.encode("utf-8")).decode() document_args.append(encoded) self.document_id = self.emitter.emit_document(*document_args) - with scope_events(self.emitter, 'document', self.document_id): + with scope_events(self.emitter, "document", self.document_id): self._index(source) def _index(self, source): @@ -68,7 +64,7 @@ def _index(self, source): self.names = get_names(source, self.filename) if self.verbose: - print('{}Searching for defs'.format(INDENT)) + print("{}Searching for defs".format(INDENT)) # First emit everything for names defined in this # file. This needs to be done first as edges need @@ -80,7 +76,7 @@ def _index(self, source): self._export_definition(name) if self.verbose: - print('{}Searching for uses'.format(INDENT)) + print("{}Searching for uses".format(INDENT)) # Next, we can emit uses. Some of these names may # reference a definition from another file or a @@ -108,10 +104,7 @@ def _export_definition(self, name): with the generated LSIF identifiers and make it queryable by the same definition object. """ - contents = [{ - 'language': 'py', - 'value': extract_text(self.source_lines, name), - }] + contents = [{"language": "py", "value": extract_text(self.source_lines, name)}] docstring = name.docstring if docstring: @@ -119,7 +112,7 @@ def _export_definition(self, name): # Emit hover tooltip and link it to a result set so that we can # re-use the same node for hover tooltips on usages. - hover_id = self.emitter.emit_hoverresult({'contents': contents}) + hover_id = self.emitter.emit_hoverresult({"contents": contents}) result_set_id = self.emitter.emit_resultset() self.emitter.emit_textdocument_hover(result_set_id, hover_id) @@ -129,11 +122,7 @@ def _export_definition(self, name): # Stash the identifiers generated above so we can use then # when exporting related uses. - self.definition_metas[name] = DefinitionMeta( - range_id, - result_set_id, - contents, - ) + self.definition_metas[name] = DefinitionMeta(range_id, result_set_id, contents) # Print progress self._debug_def(name) @@ -148,7 +137,7 @@ def _export_uses(self, name): definitions = name.definitions() except Exception as ex: raise - print('Failed to retrieve definitions: {}'.format(str(ex))) + print("Failed to retrieve definitions: {}".format(str(ex))) return for definition in definitions: @@ -199,18 +188,10 @@ def _link_uses(self, name, meta): result_id = self.emitter.emit_referenceresult() self.emitter.emit_textdocument_references(meta.result_set_id, result_id) - self.emitter.emit_item( - result_id, - [meta.range_id], - self.document_id, - 'definitions', - ) + self.emitter.emit_item(result_id, [meta.range_id], self.document_id, "definitions") self.emitter.emit_item( - result_id, - sorted(list(meta.reference_range_ids)), - self.document_id, - 'references', + result_id, sorted(list(meta.reference_range_ids)), self.document_id, "references" ) def _emit_contains(self): @@ -240,23 +221,27 @@ def _debug_def(self, name): if not self.verbose: return - print('{}Def #{}, line {}: {}'.format( - INDENT * 2, - self.definition_metas.get(name).range_id, - name.line + 1, - highlight_range(self.source_lines, name).strip()), + print( + "{}Def #{}, line {}: {}".format( + INDENT * 2, + self.definition_metas.get(name).range_id, + name.line + 1, + highlight_range(self.source_lines, name).strip(), + ) ) def _debug_use(self, name, definition): if not self.verbose or name == definition: return - print('{}Use of #{}, line {}: {}'.format( - INDENT * 2, - self.definition_metas.get(definition).range_id, - name.line + 1, - highlight_range(self.source_lines, name), - )) + print( + "{}Use of #{}, line {}: {}".format( + INDENT * 2, + self.definition_metas.get(definition).range_id, + name.line + 1, + highlight_range(self.source_lines, name), + ) + ) def index(workspace, writer, verbose, exclude_content): @@ -265,50 +250,41 @@ def index(workspace, writer, verbose, exclude_content): write the analysis of each source file as an LSIF-dump to the given file writer. """ - uri = 'file://{}'.format(os.path.abspath(workspace)) + uri = "file://{}".format(os.path.abspath(workspace)) emitter = Emitter(FileWriter(writer)) emitter.emit_metadata(PROTOCOL_VERSION, POSITION_ENCODING, uri) - project_id = emitter.emit_project('py') + project_id = emitter.emit_project("py") - with scope_events(emitter, 'project', project_id): + with scope_events(emitter, "project", project_id): file_count = 0 for root, dirs, files in os.walk(workspace): for file in files: _, ext = os.path.splitext(file) - if ext != '.py': + if ext != ".py": continue file_count += 1 path = os.path.join(root, file) - FileIndexer( - path, - emitter, - project_id, - verbose, - exclude_content, - ).index() + FileIndexer(path, emitter, project_id, verbose, exclude_content).index() if file_count == 0: - print('No files found to index') + print("No files found to index") @contextlib.contextmanager def scope_events(emitter, scope, id): - emitter.emit_event('begin', scope, id) + emitter.emit_event("begin", scope, id) yield - emitter.emit_event('end', scope, id) + emitter.emit_event("end", scope, id) def make_ranges(name): """ Return a start and end range values for a range vertex. """ - return ( - {'line': name.line, 'character': name.lo}, - {'line': name.line, 'character': name.hi}, - ) + return ({"line": name.line, "character": name.lo}, {"line": name.line, "character": name.hi}) def extract_text(source_lines, name): @@ -334,7 +310,7 @@ def highlight_range(source_lines, name): # to be a bit more careful to maintain the correct range # of the highlighted region relative to the line. - while line and line[0] in [' ', '\t']: + while line and line[0] in [" ", "\t"]: line = line[1:] lo, hi = lo - 1, hi - 1 @@ -357,10 +333,10 @@ def highlight_range(source_lines, name): line = line[:-1].rstrip() trimmed_hi = True - return '{}{}\033[4;31m{}\033[0m{}{}'.format( - '... ' if trimmed_lo else '', + return "{}{}\033[4;31m{}\033[0m{}{}".format( + "... " if trimmed_lo else "", line[:lo].lstrip(), line[lo:hi], line[hi:].rstrip(), - ' ...' if trimmed_hi else '', + " ..." if trimmed_hi else "", ) diff --git a/lsif_indexer/script.py b/lsif_indexer/script.py index f84fe3d..db82c0d 100644 --- a/lsif_indexer/script.py +++ b/lsif_indexer/script.py @@ -1,7 +1,7 @@ import argparse import time -from .consts import (INDEXER_VERSION, PROTOCOL_VERSION) +from .consts import INDEXER_VERSION, PROTOCOL_VERSION from .index import index @@ -9,21 +9,30 @@ def main(): args = parse_args() start = time.time() - with open(args.o, 'w+') as f: + with open(args.o, "w+") as f: index(args.workspace, f, args.verbose, args.exclude_content) - print('\nProcessed in {0:.2f}ms'.format((time.time() - start) * 1000)) + print("\nProcessed in {0:.2f}ms".format((time.time() - start) * 1000)) def parse_args(): - parser = argparse.ArgumentParser(description='lsif-py is an LSIF indexer for Python.') - parser.add_argument('workspace', help='set the path to the code, current directory by default') - parser.add_argument('-o', help='change the output file, "data.lsif" by default', default='data.lsif') - parser.add_argument('-v', '--verbose', action='store_true', help='Output verbose logs', default=False) - parser.add_argument('--exclude-content', action='store_true', help='Do not emit document content', default=False) - parser.add_argument('--version', action='version', version='Go LSIF indexer: {}, Protocol version: {}'.format( - INDEXER_VERSION, - PROTOCOL_VERSION, - )) + parser = argparse.ArgumentParser(description="lsif-py is an LSIF indexer for Python.") + parser.add_argument("workspace", help="set the path to the code, current directory by default") + parser.add_argument( + "-o", help='change the output file, "data.lsif" by default', default="data.lsif" + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="Output verbose logs", default=False + ) + parser.add_argument( + "--exclude-content", action="store_true", help="Do not emit document content", default=False + ) + parser.add_argument( + "--version", + action="version", + version="Go LSIF indexer: {}, Protocol version: {}".format( + INDEXER_VERSION, PROTOCOL_VERSION + ), + ) return parser.parse_args() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8af7cd2 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[tool.black] +line-length = 100 +skip-numeric-underscore-normalization = true \ No newline at end of file diff --git a/setup.py b/setup.py index 698e2aa..40cd273 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ author="Eric Fritz", author_email="eric@sourcegraph.com", description="Python LSIF Indexer", - entry_points={"console_scripts": ["lsif-py=lsif_indexer.script:main"],}, + entry_points={"console_scripts": ["lsif-py=lsif_indexer.script:main"]}, long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/sourcegraph/lsif-py", From ebe6e834dc8e189594186541fb6f808b0b0c9a24 Mon Sep 17 00:00:00 2001 From: Samson Danziger Date: Tue, 1 Dec 2020 01:55:45 +0000 Subject: [PATCH 2/6] Make more pythonic and use absolute imports --- lsif_indexer/emitter.py | 128 +++++++++++++++++++++------------------- lsif_indexer/script.py | 4 +- 2 files changed, 69 insertions(+), 63 deletions(-) diff --git a/lsif_indexer/emitter.py b/lsif_indexer/emitter.py index 3e823e6..74b8fad 100644 --- a/lsif_indexer/emitter.py +++ b/lsif_indexer/emitter.py @@ -1,4 +1,44 @@ import json +from typing import IO, Dict + +# A map from vertex labels to the fields they support. Fields +# are ordered based on their positional argument construction. +VERTEX_FIELDS = { + "$event": ["kind", "scope", "data"], + "definitionResult": [], + "document": ["languageId", "uri", "contents"], + "hoverResult": ["result"], + "metaData": ["version", "positionEncoding", "projectRoot"], + "project": ["kind"], + "range": ["start", "end"], + "referenceResult": [], + "resultSet": [], +} + +# A map from edge labels to the fields they support. Fields +# are ordered based on their positional argument construction. +EDGE_FIELDS = { + "contains": ["outV", "inVs"], + "item": ["outV", "inVs", "document", "property"], + "next": ["outV", "inV"], + "textDocument/definition": ["outV", "inV"], + "textDocument/hover": ["outV", "inV"], + "textDocument/references": ["outV", "inV"], +} + + +def _get_emitter_emit_name(base_name: str) -> str: + name = base_name.replace("$", "").replace("/", "_").lower() + return f"emit_{name}" + + +def _make_emitter(type_name, name, fields): + def emitter(self, *args): + return self.emit( + type=type_name, label=name, **dict(zip(fields, args)) + ) + + return emitter class Emitter: @@ -15,6 +55,20 @@ def __init__(self, writer): self.writer = writer self._lines = 0 + # Add an emit_ * method to the Emitter class for each vertex + # and edge type described above. The values for each field is + # supplied positionally and are optional. + for type_name, field_map in [ + ("vertex", VERTEX_FIELDS), + ("edge", EDGE_FIELDS), + ]: + for name, fields in field_map.items(): + setattr( + self, + _get_emitter_emit_name(name), + _make_emitter(type_name, name, fields), + ) + def emit(self, **kwargs): """ Create a vertex or a node with the given fields and append @@ -27,78 +81,30 @@ def emit(self, **kwargs): return node_id -class FileWriter: +class BaseWriter: + def write(self, data: IO): + raise NotImplementedError + + +class FileWriter(BaseWriter): """ FileWriter writes LSIF-dump data to the given file. """ - def __init__(self, file): + def __init__(self, file: IO): self.file = file - def write(self, data): - self.file.write(json.dumps(data, separators=(",", ":")) + "\n") + def write(self, data: Dict): + self.file.write( + json.dumps(data, separators=(",", ":")) + "\n" + ) -class DBWriter: +class DBWriter(BaseWriter): """ DBWriter writes LSIF-dump data into a SQLite database. """ - def __init__(self): - pass - def write(self, data): # TODO(efritz) - implement - raise RuntimeError("Unimplemented") - - -# A map from vertex labels to the fields they support. Fields -# are ordered based on their positional argument construction. -VERTEX_FIELDS = { - "$event": ["kind", "scope", "data"], - "definitionResult": [], - "document": ["languageId", "uri", "contents"], - "hoverResult": ["result"], - "metaData": ["version", "positionEncoding", "projectRoot"], - "project": ["kind"], - "range": ["start", "end"], - "referenceResult": [], - "resultSet": [], -} - -# A map from edge labels to the fields they support. Fields -# are ordered based on their positional argument construction. -EDGE_FIELDS = { - "contains": ["outV", "inVs"], - "item": ["outV", "inVs", "document", "property"], - "next": ["outV", "inV"], - "textDocument/definition": ["outV", "inV"], - "textDocument/hover": ["outV", "inV"], - "textDocument/references": ["outV", "inV"], -} - - -def add_emitters(): - """ - Add an emit_* method to the Emitter class for each vertex and - edge type described above. The values for each field is supplied - positionally and are optional. - """ - - def make_emitter(type_name, name, fields): - def emitter(self, *args): - return self.emit(type=type_name, label=name, **dict(zip(fields, args))) - - return emitter - - for type_name, field_map in [("vertex", VERTEX_FIELDS), ("edge", EDGE_FIELDS)]: - for name, fields in field_map.items(): - setattr( - Emitter, - "emit_{}".format(name.replace("$", "").replace("/", "_").lower()), - make_emitter(type_name, name, fields), - ) - - -# Meta-construct the Emitter class -add_emitters() + super().write(data) diff --git a/lsif_indexer/script.py b/lsif_indexer/script.py index db82c0d..2e5c380 100644 --- a/lsif_indexer/script.py +++ b/lsif_indexer/script.py @@ -1,8 +1,8 @@ import argparse import time -from .consts import INDEXER_VERSION, PROTOCOL_VERSION -from .index import index +from lsif_indexer.consts import INDEXER_VERSION, PROTOCOL_VERSION +from lsif_indexer.index import index def main(): From 55f85d50d61e431628e58b71edc9e645ebe314c9 Mon Sep 17 00:00:00 2001 From: Samson Danziger Date: Tue, 1 Dec 2020 02:44:29 +0000 Subject: [PATCH 3/6] Add typing --- lsif_indexer/analysis.py | 41 +++++++++++++----------- lsif_indexer/emitter.py | 35 +++++++------------- lsif_indexer/index.py | 69 +++++++++++++++++++++------------------- 3 files changed, 71 insertions(+), 74 deletions(-) diff --git a/lsif_indexer/analysis.py b/lsif_indexer/analysis.py index 9e62200..383aaff 100644 --- a/lsif_indexer/analysis.py +++ b/lsif_indexer/analysis.py @@ -1,4 +1,7 @@ -import jedi +from typing import List + +from jedi import Script +from jedi.api.classes import Definition class Name: @@ -7,51 +10,51 @@ class Name: variable in a particular source file. """ - def __init__(self, definition): + def __init__(self, definition: Definition): self.definition = definition - def is_definition(self): + def __eq__(self, other): + return self.definition == other.definition + + def __hash__(self): + return self.definition.__hash__() + + def is_definition(self) -> bool: """ Return true if this name is a definition or assignment. """ return self.definition.is_definition() - def definitions(self): + def definitions(self) -> List["Name"]: """ Get a list of Name objects which define or assign this particular usage of a variable reference. Generally, this will only include one object. If this name is a definition, the list will include itself. """ - return [Name(a) for a in self.definition.goto_assignments() if a != self.definition] + return [Name(a) for a in self.definition.goto() if a != self.definition] @property - def line(self): + def line(self) -> int: return self.definition.line - 1 @property - def lo(self): + def lo(self) -> int: return self.definition.column @property - def hi(self): + def hi(self) -> int: return self.definition.column + len(self.definition.name) @property - def docstring(self): + def docstring(self) -> str: return self.definition.docstring(raw=True, fast=False) - def __eq__(self, other): - return self.definition == other.definition - def __hash__(self): - return self.definition.__hash__() - - -def get_names(source, filename): +def get_names(source: str, filename: str) -> List[Name]: """ Retrieve a list of Name objects for the given source. """ - definitions = jedi.names(source, path=filename, all_scopes=True, references=True) - - return [Name(d) for d in definitions] + return [ + Name(d) for d in Script(source, path=filename).get_names(all_scopes=True, references=True) + ] diff --git a/lsif_indexer/emitter.py b/lsif_indexer/emitter.py index 74b8fad..79e6937 100644 --- a/lsif_indexer/emitter.py +++ b/lsif_indexer/emitter.py @@ -1,9 +1,9 @@ import json -from typing import IO, Dict +from typing import IO, Dict, List, Callable # A map from vertex labels to the fields they support. Fields # are ordered based on their positional argument construction. -VERTEX_FIELDS = { +VERTEX_FIELDS: Dict[str, List[str]] = { "$event": ["kind", "scope", "data"], "definitionResult": [], "document": ["languageId", "uri", "contents"], @@ -17,7 +17,7 @@ # A map from edge labels to the fields they support. Fields # are ordered based on their positional argument construction. -EDGE_FIELDS = { +EDGE_FIELDS: Dict[str, List[str]] = { "contains": ["outV", "inVs"], "item": ["outV", "inVs", "document", "property"], "next": ["outV", "inV"], @@ -32,11 +32,9 @@ def _get_emitter_emit_name(base_name: str) -> str: return f"emit_{name}" -def _make_emitter(type_name, name, fields): +def _make_emitter(type_name: str, name: str, fields: List[str]) -> Callable[[...], int]: def emitter(self, *args): - return self.emit( - type=type_name, label=name, **dict(zip(fields, args)) - ) + return self.emit(type=type_name, label=name, **dict(zip(fields, args))) return emitter @@ -51,25 +49,18 @@ class Emitter: added dynamically via setattr (below). """ - def __init__(self, writer): + def __init__(self, writer: "BaseWriter"): self.writer = writer self._lines = 0 # Add an emit_ * method to the Emitter class for each vertex # and edge type described above. The values for each field is # supplied positionally and are optional. - for type_name, field_map in [ - ("vertex", VERTEX_FIELDS), - ("edge", EDGE_FIELDS), - ]: + for type_name, field_map in [("vertex", VERTEX_FIELDS), ("edge", EDGE_FIELDS)]: for name, fields in field_map.items(): - setattr( - self, - _get_emitter_emit_name(name), - _make_emitter(type_name, name, fields), - ) + setattr(self, _get_emitter_emit_name(name), _make_emitter(type_name, name, fields)) - def emit(self, **kwargs): + def emit(self, **kwargs) -> int: """ Create a vertex or a node with the given fields and append it to the Emitter's output buffer. Generate and return a @@ -82,7 +73,7 @@ def emit(self, **kwargs): class BaseWriter: - def write(self, data: IO): + def write(self, data: Dict): raise NotImplementedError @@ -95,9 +86,7 @@ def __init__(self, file: IO): self.file = file def write(self, data: Dict): - self.file.write( - json.dumps(data, separators=(",", ":")) + "\n" - ) + self.file.write(json.dumps(data, separators=(",", ":")) + "\n") class DBWriter(BaseWriter): @@ -105,6 +94,6 @@ class DBWriter(BaseWriter): DBWriter writes LSIF-dump data into a SQLite database. """ - def write(self, data): + def write(self, data: Dict): # TODO(efritz) - implement super().write(data) diff --git a/lsif_indexer/index.py b/lsif_indexer/index.py index 85cafac..3e15756 100644 --- a/lsif_indexer/index.py +++ b/lsif_indexer/index.py @@ -1,12 +1,15 @@ import base64 import contextlib import os +from dataclasses import dataclass, field +from typing import List, Set, IO, Tuple, Dict -from lsif_indexer.analysis import get_names +from lsif_indexer.analysis import get_names, Name from lsif_indexer.emitter import Emitter, FileWriter from lsif_indexer.consts import INDENT, MAX_HIGHLIGHT_RANGE, POSITION_ENCODING, PROTOCOL_VERSION +@dataclass class DefinitionMeta: """ A bag of properties around a single source definition. @@ -14,14 +17,15 @@ class DefinitionMeta: when later linking a name reference to its definition. """ - def __init__(self, range_id, result_set_id, contents): - self.range_id = range_id - self.result_set_id = result_set_id - self.contents = contents - self.reference_range_ids = set() - self.definition_result_id = 0 + range_id: int + result_set_id: int + contents: List + reference_range_ids: Set = field(default_factory=set) + definition_result_id: int = 0 + +@dataclass class FileIndexer: """ Analysis the definitions and uses in the given file and @@ -30,13 +34,15 @@ class FileIndexer: indexer logic. """ - def __init__(self, filename, emitter, project_id, verbose, exclude_content): - self.filename = filename - self.emitter = emitter - self.project_id = project_id - self.verbose = verbose - self.exclude_content = exclude_content - self.definition_metas = {} + filename: str + emitter: Emitter + project_id: int + verbose: bool = False + exclude_content: bool = False + + source_lines: List[str] = None + document_id: int = None + definition_metas: Dict[Name, DefinitionMeta] = field(default_factory=dict) def index(self): print("Indexing file {}".format(self.filename)) @@ -56,7 +62,7 @@ def index(self): with scope_events(self.emitter, "document", self.document_id): self._index(source) - def _index(self, source): + def _index(self, source: str): # Do an initial analysis to get a list of names from # the source file. Some additional analysis may be # done lazily in later steps when needed. @@ -97,7 +103,7 @@ def _index(self, source): # Finally, link uses to their containing document self._emit_contains() - def _export_definition(self, name): + def _export_definition(self, name: Name): """ Emit vertices and edges related directly to the definition of or assignment to a variable. Create a definition meta object @@ -127,7 +133,7 @@ def _export_definition(self, name): # Print progress self._debug_def(name) - def _export_uses(self, name): + def _export_uses(self, name: Name): """ Emit vertices and edges related to any use of a definition. The definition must have already been exported by the above @@ -136,14 +142,13 @@ def _export_uses(self, name): try: definitions = name.definitions() except Exception as ex: + print(f"Failed to retrieve definitions: {ex}") raise - print("Failed to retrieve definitions: {}".format(str(ex))) - return for definition in definitions: self._export_use(name, definition) - def _export_use(self, name, definition): + def _export_use(self, name: Name, definition: Name): """ Emit vertices and edges directly related to a single use of a definition. @@ -178,7 +183,7 @@ def _export_use(self, name, definition): # Bookkeep this reference for the link procedure below meta.reference_range_ids.add(range_id) - def _link_uses(self, name, meta): + def _link_uses(self, name: Name, meta: DefinitionMeta): """ Emit vertices and edges related to the relationship between a definition and it use(s). @@ -217,7 +222,7 @@ def _emit_contains(self): # # Debugging Methods - def _debug_def(self, name): + def _debug_def(self, name: Name): if not self.verbose: return @@ -244,13 +249,13 @@ def _debug_use(self, name, definition): ) -def index(workspace, writer, verbose, exclude_content): +def index(workspace: str, writer: IO, verbose: bool, exclude_content: bool): """ Read each python file (recursively) in the given path and write the analysis of each source file as an LSIF-dump to the given file writer. """ - uri = "file://{}".format(os.path.abspath(workspace)) + uri = f"file://{os.path.abspath(workspace)}" emitter = Emitter(FileWriter(writer)) emitter.emit_metadata(PROTOCOL_VERSION, POSITION_ENCODING, uri) @@ -274,20 +279,20 @@ def index(workspace, writer, verbose, exclude_content): @contextlib.contextmanager -def scope_events(emitter, scope, id): - emitter.emit_event("begin", scope, id) +def scope_events(emitter: Emitter, scope: str, id_: int): + emitter.emit_event("begin", scope, id_) yield - emitter.emit_event("end", scope, id) + emitter.emit_event("end", scope, id_) -def make_ranges(name): +def make_ranges(name: Name) -> Tuple[Dict[str, int], Dict[str, int]]: """ Return a start and end range values for a range vertex. """ - return ({"line": name.line, "character": name.lo}, {"line": name.line, "character": name.hi}) + return {"line": name.line, "character": name.lo}, {"line": name.line, "character": name.hi} -def extract_text(source_lines, name): +def extract_text(source_lines: List[str], name: Name) -> str: """ Extract the text at the range described by the given name. """ @@ -295,7 +300,7 @@ def extract_text(source_lines, name): return source_lines[name.line].strip() -def highlight_range(source_lines, name): +def highlight_range(source_lines: List[str], name: Name) -> str: """ Return the source line where the name occurs with the range described by the name highlighted with an ANSI code. @@ -320,7 +325,7 @@ def highlight_range(source_lines, name): # string and leave the highlighted portion somewhere in the # middle. - while len(line) > MAX_HIGHLIGHT_RANGE and (hi - lo) < MAX_HIGHLIGHT_RANGE: + while len(line) > MAX_HIGHLIGHT_RANGE > (hi - lo): trimmable_lo = lo > 0 trimmable_hi = len(line) - hi - 1 From dfc02e486d4f6a772f68117335ccc57efc16b6ef Mon Sep 17 00:00:00 2001 From: Samson Danziger Date: Tue, 1 Dec 2020 03:19:48 +0000 Subject: [PATCH 4/6] Add real functions for emit_ --- lsif_indexer/emitter.py | 128 +++++++++++++++++++++++++--------------- lsif_indexer/index.py | 14 ++--- 2 files changed, 88 insertions(+), 54 deletions(-) diff --git a/lsif_indexer/emitter.py b/lsif_indexer/emitter.py index 79e6937..448bcbf 100644 --- a/lsif_indexer/emitter.py +++ b/lsif_indexer/emitter.py @@ -1,42 +1,11 @@ +from enum import Enum import json -from typing import IO, Dict, List, Callable - -# A map from vertex labels to the fields they support. Fields -# are ordered based on their positional argument construction. -VERTEX_FIELDS: Dict[str, List[str]] = { - "$event": ["kind", "scope", "data"], - "definitionResult": [], - "document": ["languageId", "uri", "contents"], - "hoverResult": ["result"], - "metaData": ["version", "positionEncoding", "projectRoot"], - "project": ["kind"], - "range": ["start", "end"], - "referenceResult": [], - "resultSet": [], -} - -# A map from edge labels to the fields they support. Fields -# are ordered based on their positional argument construction. -EDGE_FIELDS: Dict[str, List[str]] = { - "contains": ["outV", "inVs"], - "item": ["outV", "inVs", "document", "property"], - "next": ["outV", "inV"], - "textDocument/definition": ["outV", "inV"], - "textDocument/hover": ["outV", "inV"], - "textDocument/references": ["outV", "inV"], -} - - -def _get_emitter_emit_name(base_name: str) -> str: - name = base_name.replace("$", "").replace("/", "_").lower() - return f"emit_{name}" - - -def _make_emitter(type_name: str, name: str, fields: List[str]) -> Callable[[...], int]: - def emitter(self, *args): - return self.emit(type=type_name, label=name, **dict(zip(fields, args))) - - return emitter +from typing import IO, Dict + + +class EmitterNode(Enum): + vertex = "vertex" + edge = "edge" class Emitter: @@ -53,14 +22,7 @@ def __init__(self, writer: "BaseWriter"): self.writer = writer self._lines = 0 - # Add an emit_ * method to the Emitter class for each vertex - # and edge type described above. The values for each field is - # supplied positionally and are optional. - for type_name, field_map in [("vertex", VERTEX_FIELDS), ("edge", EDGE_FIELDS)]: - for name, fields in field_map.items(): - setattr(self, _get_emitter_emit_name(name), _make_emitter(type_name, name, fields)) - - def emit(self, **kwargs) -> int: + def emit(self, *, type: EmitterNode, label: str, **kwargs) -> int: """ Create a vertex or a node with the given fields and append it to the Emitter's output buffer. Generate and return a @@ -68,9 +30,81 @@ def emit(self, **kwargs) -> int: """ node_id = self._lines + 1 self._lines += 1 - self.writer.write({"id": node_id, **kwargs}) + self.writer.write({"id": node_id, "type": type.value, "label": label, **kwargs}) return node_id + # Vertex Emits + + def emit_event(self, kind, scope, data) -> int: + return self.emit(type=EmitterNode.vertex, label="event", kind=kind, scope=scope, data=data) + + def emit_definition_result(self) -> int: + return self.emit(type=EmitterNode.vertex, label="definitionResult") + + def emit_document(self, language_id, uri, contents) -> int: + return self.emit( + type=EmitterNode.vertex, + label="document", + languageId=language_id, + uri=uri, + contents=contents, + ) + + def emit_hover_result(self, result) -> int: + return self.emit(type=EmitterNode.vertex, label="hoverResult", result=result) + + def emit_metadata(self, version, position_encoding, project_root) -> int: + return self.emit( + type=EmitterNode.vertex, + label="metaData", + version=version, + positionEncoding=position_encoding, + projectRoot=project_root, + ) + + def emit_project(self, kind) -> int: + return self.emit(type=EmitterNode.vertex, label="project", kind=kind) + + def emit_range(self, start, end) -> int: + return self.emit(type=EmitterNode.vertex, label="range", start=start, end=end) + + def emit_reference_result(self) -> int: + return self.emit(type=EmitterNode.vertex, label="referenceResult") + + def emit_result_set(self) -> int: + return self.emit(type=EmitterNode.vertex, label="resultSet") + + # Edge Emits + + def emit_contains(self, out_v, in_vs) -> int: + return self.emit(type=EmitterNode.edge, label="contains", outV=out_v, inVs=in_vs) + + def emit_item(self, out_v, in_vs, document, property) -> int: + return self.emit( + type=EmitterNode.edge, + label="contains", + outV=out_v, + inVs=in_vs, + document=document, + property=property, + ) + + def emit_next(self, out_v, in_v) -> int: + return self.emit(type=EmitterNode.edge, label="contains", outV=out_v, inV=in_v) + + def emit_text_document_definition(self, out_v, in_v) -> int: + return self.emit( + type=EmitterNode.edge, label="textDocument/definition", outV=out_v, inV=in_v + ) + + def emit_text_document_hover(self, out_v, in_v) -> int: + return self.emit(type=EmitterNode.edge, label="textDocument/hover", outV=out_v, inV=in_v) + + def emit_text_document_references(self, out_v, in_v) -> int: + return self.emit( + type=EmitterNode.edge, label="textDocument/references", outV=out_v, inV=in_v + ) + class BaseWriter: def write(self, data: Dict): diff --git a/lsif_indexer/index.py b/lsif_indexer/index.py index 3e15756..dcc4847 100644 --- a/lsif_indexer/index.py +++ b/lsif_indexer/index.py @@ -118,9 +118,9 @@ def _export_definition(self, name: Name): # Emit hover tooltip and link it to a result set so that we can # re-use the same node for hover tooltips on usages. - hover_id = self.emitter.emit_hoverresult({"contents": contents}) - result_set_id = self.emitter.emit_resultset() - self.emitter.emit_textdocument_hover(result_set_id, hover_id) + hover_id = self.emitter.emit_hover_result({"contents": contents}) + result_set_id = self.emitter.emit_result_set() + self.emitter.emit_text_document_hover(result_set_id, hover_id) # Link result set to range range_id = self.emitter.emit_range(*make_ranges(name)) @@ -174,8 +174,8 @@ def _export_use(self, name: Name, definition: Name): self.emitter.emit_next(range_id, meta.result_set_id) if not meta.definition_result_id: - result_id = self.emitter.emit_definitionresult() - self.emitter.emit_textdocument_definition(meta.result_set_id, result_id) + result_id = self.emitter.emit_definition_result() + self.emitter.emit_text_document_definition(meta.result_set_id, result_id) meta.definition_result_id = result_id self.emitter.emit_item(meta.definition_result_id, [meta.range_id], self.document_id) @@ -191,8 +191,8 @@ def _link_uses(self, name: Name, meta: DefinitionMeta): if len(meta.reference_range_ids) == 0: return - result_id = self.emitter.emit_referenceresult() - self.emitter.emit_textdocument_references(meta.result_set_id, result_id) + result_id = self.emitter.emit_reference_result() + self.emitter.emit_text_document_references(meta.result_set_id, result_id) self.emitter.emit_item(result_id, [meta.range_id], self.document_id, "definitions") self.emitter.emit_item( From c67ce26059f53802cd5c2147788687038f7dc3da Mon Sep 17 00:00:00 2001 From: Samson Danziger Date: Tue, 1 Dec 2020 03:45:58 +0000 Subject: [PATCH 5/6] Add typing to real emit_ functions --- lsif_indexer/emitter.py | 44 ++++++++++++++++++++++++++++------------- lsif_indexer/index.py | 20 ++++++++++--------- requirements.txt | 1 + 3 files changed, 42 insertions(+), 23 deletions(-) diff --git a/lsif_indexer/emitter.py b/lsif_indexer/emitter.py index 448bcbf..00342d8 100644 --- a/lsif_indexer/emitter.py +++ b/lsif_indexer/emitter.py @@ -1,6 +1,6 @@ from enum import Enum import json -from typing import IO, Dict +from typing import IO, Dict, TypedDict, List, Optional class EmitterNode(Enum): @@ -8,6 +8,19 @@ class EmitterNode(Enum): edge = "edge" +class RangeValue(TypedDict): + line: int + character: int + + +class HoverResultValue(TypedDict): + class ContentValue(TypedDict): + language: str + value: str + + contents: List[ContentValue] + + class Emitter: """ Emitter writes LSIF-dump data to the given writer. The location to @@ -30,18 +43,19 @@ def emit(self, *, type: EmitterNode, label: str, **kwargs) -> int: """ node_id = self._lines + 1 self._lines += 1 - self.writer.write({"id": node_id, "type": type.value, "label": label, **kwargs}) + cleaned_kwargs = {key: value for key, value in kwargs.items() if value is not None} + self.writer.write({"id": node_id, "type": type.value, "label": label, **cleaned_kwargs}) return node_id # Vertex Emits - def emit_event(self, kind, scope, data) -> int: + def emit_event(self, kind: str, scope: str, data: int) -> int: return self.emit(type=EmitterNode.vertex, label="event", kind=kind, scope=scope, data=data) def emit_definition_result(self) -> int: return self.emit(type=EmitterNode.vertex, label="definitionResult") - def emit_document(self, language_id, uri, contents) -> int: + def emit_document(self, language_id: str, uri: str, contents: Optional[str] = None) -> int: return self.emit( type=EmitterNode.vertex, label="document", @@ -50,10 +64,10 @@ def emit_document(self, language_id, uri, contents) -> int: contents=contents, ) - def emit_hover_result(self, result) -> int: + def emit_hover_result(self, result: HoverResultValue) -> int: return self.emit(type=EmitterNode.vertex, label="hoverResult", result=result) - def emit_metadata(self, version, position_encoding, project_root) -> int: + def emit_metadata(self, version: str, position_encoding: str, project_root: str) -> int: return self.emit( type=EmitterNode.vertex, label="metaData", @@ -62,10 +76,10 @@ def emit_metadata(self, version, position_encoding, project_root) -> int: projectRoot=project_root, ) - def emit_project(self, kind) -> int: + def emit_project(self, kind: str) -> int: return self.emit(type=EmitterNode.vertex, label="project", kind=kind) - def emit_range(self, start, end) -> int: + def emit_range(self, start: RangeValue, end: RangeValue) -> int: return self.emit(type=EmitterNode.vertex, label="range", start=start, end=end) def emit_reference_result(self) -> int: @@ -76,10 +90,12 @@ def emit_result_set(self) -> int: # Edge Emits - def emit_contains(self, out_v, in_vs) -> int: + def emit_contains(self, out_v: int, in_vs: List[int]) -> int: return self.emit(type=EmitterNode.edge, label="contains", outV=out_v, inVs=in_vs) - def emit_item(self, out_v, in_vs, document, property) -> int: + def emit_item( + self, out_v: int, in_vs: List[int], document: int, property: Optional[str] = None + ) -> int: return self.emit( type=EmitterNode.edge, label="contains", @@ -89,18 +105,18 @@ def emit_item(self, out_v, in_vs, document, property) -> int: property=property, ) - def emit_next(self, out_v, in_v) -> int: + def emit_next(self, out_v: int, in_v: int) -> int: return self.emit(type=EmitterNode.edge, label="contains", outV=out_v, inV=in_v) - def emit_text_document_definition(self, out_v, in_v) -> int: + def emit_text_document_definition(self, out_v: int, in_v: int) -> int: return self.emit( type=EmitterNode.edge, label="textDocument/definition", outV=out_v, inV=in_v ) - def emit_text_document_hover(self, out_v, in_v) -> int: + def emit_text_document_hover(self, out_v: int, in_v: int) -> int: return self.emit(type=EmitterNode.edge, label="textDocument/hover", outV=out_v, inV=in_v) - def emit_text_document_references(self, out_v, in_v) -> int: + def emit_text_document_references(self, out_v: int, in_v: int) -> int: return self.emit( type=EmitterNode.edge, label="textDocument/references", outV=out_v, inV=in_v ) diff --git a/lsif_indexer/index.py b/lsif_indexer/index.py index dcc4847..8370e85 100644 --- a/lsif_indexer/index.py +++ b/lsif_indexer/index.py @@ -5,7 +5,7 @@ from typing import List, Set, IO, Tuple, Dict from lsif_indexer.analysis import get_names, Name -from lsif_indexer.emitter import Emitter, FileWriter +from lsif_indexer.emitter import Emitter, FileWriter, RangeValue from lsif_indexer.consts import INDENT, MAX_HIGHLIGHT_RANGE, POSITION_ENCODING, PROTOCOL_VERSION @@ -51,13 +51,15 @@ def index(self): source = f.read() self.source_lines = source.split("\n") - document_args = ["py", "file://{}".format(os.path.abspath(self.filename))] - - if not self.exclude_content: - encoded = base64.b64encode(source.encode("utf-8")).decode() - document_args.append(encoded) - - self.document_id = self.emitter.emit_document(*document_args) + self.document_id = self.emitter.emit_document( + language_id="py", + uri=f"file://{os.path.abspath(self.filename)}", + contents=( + base64.b64encode(source.encode("utf-8")).decode() + if not self.exclude_content + else None + ), + ) with scope_events(self.emitter, "document", self.document_id): self._index(source) @@ -285,7 +287,7 @@ def scope_events(emitter: Emitter, scope: str, id_: int): emitter.emit_event("end", scope, id_) -def make_ranges(name: Name) -> Tuple[Dict[str, int], Dict[str, int]]: +def make_ranges(name: Name) -> Tuple[RangeValue, RangeValue]: """ Return a start and end range values for a range vertex. """ diff --git a/requirements.txt b/requirements.txt index e435bbc..3f0babc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ jedi==0.16.0 +typing_extensions==3.7.4 From 91d4eedcd5b4daca009db65aba46c0e67a029869 Mon Sep 17 00:00:00 2001 From: Samson Danziger Date: Tue, 1 Dec 2020 03:47:27 +0000 Subject: [PATCH 6/6] Correct emitted label --- lsif_indexer/emitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lsif_indexer/emitter.py b/lsif_indexer/emitter.py index 00342d8..3fcdd57 100644 --- a/lsif_indexer/emitter.py +++ b/lsif_indexer/emitter.py @@ -50,7 +50,7 @@ def emit(self, *, type: EmitterNode, label: str, **kwargs) -> int: # Vertex Emits def emit_event(self, kind: str, scope: str, data: int) -> int: - return self.emit(type=EmitterNode.vertex, label="event", kind=kind, scope=scope, data=data) + return self.emit(type=EmitterNode.vertex, label="$event", kind=kind, scope=scope, data=data) def emit_definition_result(self) -> int: return self.emit(type=EmitterNode.vertex, label="definitionResult")