From ec8f008e60e03ef2815e1f46b99169b5ac6867be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20Lepp=C3=A4nen?= Date: Wed, 14 Jan 2026 14:41:50 +0200 Subject: [PATCH 1/2] feat(robot): add namespace disk caching with atomic writes and tests Add persistent namespace caching to significantly improve warm start performance. Cached namespaces are loaded from disk instead of being rebuilt from scratch. Key changes: - Add NamespaceMetaData and NamespaceCacheData frozen dataclasses for cache serialization with validation fields (mtime, content_hash, python_executable, sys_path_hash) - Add atomic cache writes using temp file + rename pattern - Add reverse dependency tracking for efficient library/variable change propagation (get_library_users, get_variables_users) - Skip content hash computation when mtime AND size match - Add ResourceMetaData for resource caching Tests: - Unit tests for PickleDataCache atomic writes (28 tests) - Unit tests for NamespaceMetaData and cached entries (20 tests) - Unit tests for ResourceMetaData cache keys (15 tests) - Integration tests for namespace caching behavior (11 tests) --- .../robotframework/parts/diagnostics.py | 127 ++-- .../robotcode/robot/diagnostics/data_cache.py | 26 +- .../diagnostics/document_cache_helper.py | 380 ++++++++++-- .../robot/diagnostics/imports_manager.py | 292 +++++++-- .../robotcode/robot/diagnostics/namespace.py | 568 +++++++++++++++++- .../parts/test_namespace_caching.py | 222 +++++++ .../robot/diagnostics/test_data_cache.py | 257 ++++++++ .../diagnostics/test_imports_manager_cache.py | 180 ++++++ .../robot/diagnostics/test_namespace_cache.py | 370 ++++++++++++ 9 files changed, 2263 insertions(+), 159 deletions(-) create mode 100644 tests/robotcode/language_server/robotframework/parts/test_namespace_caching.py create mode 100644 tests/robotcode/robot/diagnostics/test_data_cache.py create mode 100644 tests/robotcode/robot/diagnostics/test_imports_manager_cache.py create mode 100644 tests/robotcode/robot/diagnostics/test_namespace_cache.py diff --git a/packages/language_server/src/robotcode/language_server/robotframework/parts/diagnostics.py b/packages/language_server/src/robotcode/language_server/robotframework/parts/diagnostics.py index ffdf2b0d..1a06a90d 100644 --- a/packages/language_server/src/robotcode/language_server/robotframework/parts/diagnostics.py +++ b/packages/language_server/src/robotcode/language_server/robotframework/parts/diagnostics.py @@ -45,29 +45,35 @@ def __init__(self, parent: "RobotLanguageServerProtocol") -> None: self.parent.diagnostics.collect.add(self.collect_unused_keyword_references) self.parent.diagnostics.collect.add(self.collect_unused_variable_references) - self.parent.diagnostics.on_get_related_documents.add(self._on_get_related_documents) + self.parent.diagnostics.on_get_related_documents.add( + self._on_get_related_documents + ) def _on_initialized(self, sender: Any) -> None: self.parent.diagnostics.analyze.add(self.analyze) - self.parent.documents_cache.namespace_initialized(self._on_namespace_initialized) + self.parent.documents_cache.namespace_initialized( + self._on_namespace_initialized + ) self.parent.documents_cache.libraries_changed.add(self._on_libraries_changed) self.parent.documents_cache.variables_changed.add(self._on_variables_changed) def _on_libraries_changed(self, sender: Any, libraries: List[LibraryDoc]) -> None: - for doc in self.parent.documents.documents: - namespace = self.parent.documents_cache.get_only_initialized_namespace(doc) - if namespace is not None: - lib_docs = (e.library_doc for e in namespace.get_libraries().values()) - if any(lib_doc in lib_docs for lib_doc in libraries): - self.parent.diagnostics.force_refresh_document(doc) + docs_to_refresh: set[TextDocument] = set() + for lib_doc in libraries: + docs_to_refresh.update( + self.parent.documents_cache.get_library_users(lib_doc) + ) + for doc in docs_to_refresh: + self.parent.diagnostics.force_refresh_document(doc) def _on_variables_changed(self, sender: Any, variables: List[LibraryDoc]) -> None: - for doc in self.parent.documents.documents: - namespace = self.parent.documents_cache.get_only_initialized_namespace(doc) - if namespace is not None: - lib_docs = (e.library_doc for e in namespace.get_variables_imports().values()) - if any(lib_doc in lib_docs for lib_doc in variables): - self.parent.diagnostics.force_refresh_document(doc) + docs_to_refresh: set[TextDocument] = set() + for var_doc in variables: + docs_to_refresh.update( + self.parent.documents_cache.get_variables_users(var_doc) + ) + for doc in docs_to_refresh: + self.parent.diagnostics.force_refresh_document(doc) @language_id("robotframework") def analyze(self, sender: Any, document: TextDocument) -> None: @@ -79,41 +85,35 @@ def _on_namespace_initialized(self, sender: Any, namespace: Namespace) -> None: self.parent.diagnostics.force_refresh_document(namespace.document) @language_id("robotframework") - def _on_get_related_documents(self, sender: Any, document: TextDocument) -> Optional[List[TextDocument]]: + def _on_get_related_documents( + self, sender: Any, document: TextDocument + ) -> Optional[List[TextDocument]]: namespace = self.parent.documents_cache.get_only_initialized_namespace(document) if namespace is None: return None + source = str(document.uri.to_path()) + return self.parent.documents_cache.get_importers(source) - result = [] - - lib_doc = namespace.get_library_doc() - for doc in self.parent.documents.documents: - if doc.language_id != "robotframework": - continue - - doc_namespace = self.parent.documents_cache.get_only_initialized_namespace(doc) - if doc_namespace is None: - continue - - if doc_namespace.is_analyzed(): - for ref in doc_namespace.get_namespace_references(): - if ref.library_doc == lib_doc: - result.append(doc) - - return result - - def modify_diagnostics(self, document: TextDocument, diagnostics: List[Diagnostic]) -> List[Diagnostic]: - return self.parent.documents_cache.get_diagnostic_modifier(document).modify_diagnostics(diagnostics) + def modify_diagnostics( + self, document: TextDocument, diagnostics: List[Diagnostic] + ) -> List[Diagnostic]: + return self.parent.documents_cache.get_diagnostic_modifier( + document + ).modify_diagnostics(diagnostics) @language_id("robotframework") def collect_namespace_diagnostics( - self, sender: Any, document: TextDocument, diagnostics_type: DiagnosticsCollectType + self, + sender: Any, + document: TextDocument, + diagnostics_type: DiagnosticsCollectType, ) -> DiagnosticsResult: try: namespace = self.parent.documents_cache.get_namespace(document) return DiagnosticsResult( - self.collect_namespace_diagnostics, self.modify_diagnostics(document, namespace.get_diagnostics()) + self.collect_namespace_diagnostics, + self.modify_diagnostics(document, namespace.get_diagnostics()), ) except (CancelledError, SystemExit, KeyboardInterrupt): raise @@ -141,7 +141,10 @@ def collect_namespace_diagnostics( @language_id("robotframework") @_logger.call def collect_unused_keyword_references( - self, sender: Any, document: TextDocument, diagnostics_type: DiagnosticsCollectType + self, + sender: Any, + document: TextDocument, + diagnostics_type: DiagnosticsCollectType, ) -> DiagnosticsResult: config = self.parent.workspace.get_configuration(AnalysisConfig, document.uri) @@ -153,7 +156,9 @@ def collect_unused_keyword_references( return self._collect_unused_keyword_references(document) - def _collect_unused_keyword_references(self, document: TextDocument) -> DiagnosticsResult: + def _collect_unused_keyword_references( + self, document: TextDocument + ) -> DiagnosticsResult: try: namespace = self.parent.documents_cache.get_namespace(document) @@ -161,7 +166,9 @@ def _collect_unused_keyword_references(self, document: TextDocument) -> Diagnost for kw in (namespace.get_library_doc()).keywords.values(): check_current_task_canceled() - references = self.parent.robot_references.find_keyword_references(document, kw, False, True) + references = self.parent.robot_references.find_keyword_references( + document, kw, False, True + ) if not references: result.append( Diagnostic( @@ -174,7 +181,10 @@ def _collect_unused_keyword_references(self, document: TextDocument) -> Diagnost ) ) - return DiagnosticsResult(self.collect_unused_keyword_references, self.modify_diagnostics(document, result)) + return DiagnosticsResult( + self.collect_unused_keyword_references, + self.modify_diagnostics(document, result), + ) except (CancelledError, SystemExit, KeyboardInterrupt): raise except BaseException as e: @@ -200,7 +210,10 @@ def _collect_unused_keyword_references(self, document: TextDocument) -> Diagnost @language_id("robotframework") @_logger.call def collect_unused_variable_references( - self, sender: Any, document: TextDocument, diagnostics_type: DiagnosticsCollectType + self, + sender: Any, + document: TextDocument, + diagnostics_type: DiagnosticsCollectType, ) -> DiagnosticsResult: config = self.parent.workspace.get_configuration(AnalysisConfig, document.uri) @@ -208,11 +221,15 @@ def collect_unused_variable_references( return DiagnosticsResult(self.collect_unused_variable_references, []) if diagnostics_type != DiagnosticsCollectType.SLOW: - return DiagnosticsResult(self.collect_unused_variable_references, None, True) + return DiagnosticsResult( + self.collect_unused_variable_references, None, True + ) return self._collect_unused_variable_references(document) - def _collect_unused_variable_references(self, document: TextDocument) -> DiagnosticsResult: + def _collect_unused_variable_references( + self, document: TextDocument + ) -> DiagnosticsResult: try: namespace = self.parent.documents_cache.get_namespace(document) @@ -222,14 +239,25 @@ def _collect_unused_variable_references(self, document: TextDocument) -> Diagnos check_current_task_canceled() if isinstance( - var, (LibraryArgumentDefinition, EnvironmentVariableDefinition, GlobalVariableDefinition) + var, + ( + LibraryArgumentDefinition, + EnvironmentVariableDefinition, + GlobalVariableDefinition, + ), ): continue - if var.name_token is not None and var.name_token.value and var.name_token.value.startswith("_"): + if ( + var.name_token is not None + and var.name_token.value + and var.name_token.value.startswith("_") + ): continue - references = self.parent.robot_references.find_variable_references(document, var, False, True) + references = self.parent.robot_references.find_variable_references( + document, var, False, True + ) if not references: result.append( Diagnostic( @@ -243,7 +271,10 @@ def _collect_unused_variable_references(self, document: TextDocument) -> Diagnos ) ) - return DiagnosticsResult(self.collect_unused_variable_references, self.modify_diagnostics(document, result)) + return DiagnosticsResult( + self.collect_unused_variable_references, + self.modify_diagnostics(document, result), + ) except (CancelledError, SystemExit, KeyboardInterrupt): raise except BaseException as e: diff --git a/packages/robot/src/robotcode/robot/diagnostics/data_cache.py b/packages/robot/src/robotcode/robot/diagnostics/data_cache.py index bd7bc63d..7ad713b4 100644 --- a/packages/robot/src/robotcode/robot/diagnostics/data_cache.py +++ b/packages/robot/src/robotcode/robot/diagnostics/data_cache.py @@ -1,4 +1,6 @@ +import os import pickle +import tempfile from abc import ABC, abstractmethod from enum import Enum from pathlib import Path @@ -12,6 +14,8 @@ class CacheSection(Enum): LIBRARY = "libdoc" VARIABLES = "variables" + RESOURCE = "resource" + NAMESPACE = "namespace" class DataCache(ABC): @@ -85,5 +89,23 @@ def save_cache_data(self, section: CacheSection, entry_name: str, data: Any) -> cached_file = self.build_cache_data_filename(section, entry_name) cached_file.parent.mkdir(parents=True, exist_ok=True) - with cached_file.open("wb") as f: - pickle.dump(data, f) + + # Atomic write: write to temp file, then rename + # This ensures readers never see partial/corrupt data + temp_fd, temp_path = tempfile.mkstemp( + dir=cached_file.parent, + prefix=cached_file.stem + "_", + suffix=".tmp", + ) + try: + with os.fdopen(temp_fd, "wb") as f: + pickle.dump(data, f) + # Atomic rename (POSIX guarantees atomicity; Windows may fail if target exists) + Path(temp_path).replace(cached_file) + except Exception: + # Clean up temp file on failure (temp file may be left behind on SystemExit/KeyboardInterrupt) + try: + os.unlink(temp_path) + except OSError: + pass + raise diff --git a/packages/robot/src/robotcode/robot/diagnostics/document_cache_helper.py b/packages/robot/src/robotcode/robot/diagnostics/document_cache_helper.py index ed32591f..2ca76abb 100644 --- a/packages/robot/src/robotcode/robot/diagnostics/document_cache_helper.py +++ b/packages/robot/src/robotcode/robot/diagnostics/document_cache_helper.py @@ -1,7 +1,9 @@ from __future__ import annotations import ast +import hashlib import io +import sys import threading import weakref from logging import CRITICAL @@ -11,10 +13,6 @@ Callable, Iterable, Iterator, - List, - Optional, - Tuple, - Union, cast, ) @@ -31,9 +29,10 @@ from ..config.model import RobotBaseProfile from ..utils import get_robot_version from ..utils.stubs import Languages +from .data_cache import CacheSection from .imports_manager import ImportsManager from .library_doc import LibraryDoc -from .namespace import DocumentType, Namespace +from .namespace import DocumentType, Namespace, NamespaceCacheData, NamespaceMetaData from .workspace_config import ( AnalysisDiagnosticModifiersConfig, AnalysisRobotConfig, @@ -42,6 +41,9 @@ WorkspaceAnalysisConfig, ) +# Interval for cleaning up stale entries in dependency maps +_DEPENDENCY_CLEANUP_INTERVAL = 100 + class UnknownFileTypeError(Exception): pass @@ -59,8 +61,8 @@ def __init__( workspace: Workspace, documents_manager: DocumentsManager, file_watcher_manager: FileWatcherManagerBase, - robot_profile: Optional[RobotBaseProfile], - analysis_config: Optional[WorkspaceAnalysisConfig], + robot_profile: RobotBaseProfile | None, + analysis_config: WorkspaceAnalysisConfig | None, ) -> None: self.INITIALIZED_NAMESPACE = _CacheEntry() @@ -71,14 +73,36 @@ def __init__( self.robot_profile = robot_profile or RobotBaseProfile() self.analysis_config = analysis_config or WorkspaceAnalysisConfig() + # Lock ordering (to prevent deadlocks when acquiring multiple locks): + # 1. _imports_managers_lock + # 2. _importers_lock + # 3. _library_users_lock + # 4. _variables_users_lock + # Always acquire in this order if multiple locks are needed in the same operation. + self._imports_managers_lock = threading.RLock() self._imports_managers: weakref.WeakKeyDictionary[WorkspaceFolder, ImportsManager] = weakref.WeakKeyDictionary() - self._default_imports_manager: Optional[ImportsManager] = None - self._workspace_languages: weakref.WeakKeyDictionary[WorkspaceFolder, Optional[Languages]] = ( + self._default_imports_manager: ImportsManager | None = None + self._workspace_languages: weakref.WeakKeyDictionary[WorkspaceFolder, Languages | None] = ( weakref.WeakKeyDictionary() ) - def get_languages_for_document(self, document_or_uri: Union[TextDocument, Uri, str]) -> Optional[Languages]: + # Reverse dependency map: source path -> set of documents that import it + self._importers_lock = threading.RLock() + self._importers: dict[str, weakref.WeakSet[TextDocument]] = {} + + # Reverse dependency maps for libraries and variables (by source path for stable lookup) + # Using source path instead of id() because Python can reuse object IDs after GC + self._library_users_lock = threading.RLock() + self._library_users: dict[str, weakref.WeakSet[TextDocument]] = {} + + self._variables_users_lock = threading.RLock() + self._variables_users: dict[str, weakref.WeakSet[TextDocument]] = {} + + # Counter for periodic cleanup of stale dependency map entries + self._track_count = 0 + + def get_languages_for_document(self, document_or_uri: TextDocument | Uri | str) -> Languages | None: if get_robot_version() < (6, 0): return None @@ -86,7 +110,7 @@ def get_languages_for_document(self, document_or_uri: Union[TextDocument, Uri, s Languages as RobotLanguages, ) - uri: Union[Uri, str] + uri: Uri | str if isinstance(document_or_uri, TextDocument): uri = document_or_uri.uri @@ -129,7 +153,7 @@ def get_languages_for_document(self, document_or_uri: Union[TextDocument, Uri, s def build_languages_from_model( self, document: TextDocument, model: ast.AST - ) -> Tuple[Optional[Languages], Optional[Languages]]: + ) -> tuple[Languages | None, Languages | None]: if get_robot_version() < (6, 0): return (None, None) @@ -171,12 +195,12 @@ def __get_document_type(self, document: TextDocument) -> DocumentType: return DocumentType.UNKNOWN - def get_tokens(self, document: TextDocument, data_only: bool = False) -> List[Token]: + def get_tokens(self, document: TextDocument, data_only: bool = False) -> list[Token]: if data_only: return self.__get_tokens_data_only(document) return self.__get_tokens(document) - def __get_tokens_data_only(self, document: TextDocument) -> List[Token]: + def __get_tokens_data_only(self, document: TextDocument) -> list[Token]: document_type = self.get_document_type(document) if document_type == DocumentType.INIT: return self.get_init_tokens(document, True) @@ -187,7 +211,7 @@ def __get_tokens_data_only(self, document: TextDocument) -> List[Token]: raise UnknownFileTypeError(str(document.uri)) - def __get_tokens(self, document: TextDocument) -> List[Token]: + def __get_tokens(self, document: TextDocument) -> list[Token]: document_type = self.get_document_type(document) if document_type == DocumentType.INIT: return self.get_init_tokens(document) @@ -198,7 +222,7 @@ def __get_tokens(self, document: TextDocument) -> List[Token]: raise UnknownFileTypeError(str(document.uri)) - def get_general_tokens(self, document: TextDocument, data_only: bool = False) -> List[Token]: + def get_general_tokens(self, document: TextDocument, data_only: bool = False) -> list[Token]: if document.version is None: if data_only: return self.__get_general_tokens_data_only(document) @@ -266,28 +290,28 @@ def __internal_get_init_tokens( return robot.api.get_init_tokens(source, data_only=data_only, tokenize_variables=tokenize_variables) - def __get_general_tokens_data_only(self, document: TextDocument) -> List[Token]: + def __get_general_tokens_data_only(self, document: TextDocument) -> list[Token]: lang = self.get_languages_for_document(document) - def get(text: str) -> List[Token]: + def get(text: str) -> list[Token]: with io.StringIO(text) as content: return [e for e in self.__internal_get_tokens(content, True, lang=lang)] return self.__get_tokens_internal(document, get) - def __get_general_tokens(self, document: TextDocument) -> List[Token]: + def __get_general_tokens(self, document: TextDocument) -> list[Token]: lang = self.get_languages_for_document(document) - def get(text: str) -> List[Token]: + def get(text: str) -> list[Token]: with io.StringIO(text) as content: return [e for e in self.__internal_get_tokens(content, lang=lang)] return self.__get_tokens_internal(document, get) - def __get_tokens_internal(self, document: TextDocument, get: Callable[[str], List[Token]]) -> List[Token]: + def __get_tokens_internal(self, document: TextDocument, get: Callable[[str], list[Token]]) -> list[Token]: return get(document.text()) - def get_resource_tokens(self, document: TextDocument, data_only: bool = False) -> List[Token]: + def get_resource_tokens(self, document: TextDocument, data_only: bool = False) -> list[Token]: if document.version is None: if data_only: return self.__get_resource_tokens_data_only(document) @@ -299,25 +323,25 @@ def get_resource_tokens(self, document: TextDocument, data_only: bool = False) - return document.get_cache(self.__get_resource_tokens) - def __get_resource_tokens_data_only(self, document: TextDocument) -> List[Token]: + def __get_resource_tokens_data_only(self, document: TextDocument) -> list[Token]: lang = self.get_languages_for_document(document) - def get(text: str) -> List[Token]: + def get(text: str) -> list[Token]: with io.StringIO(text) as content: return [e for e in self.__internal_get_resource_tokens(content, True, lang=lang)] return self.__get_tokens_internal(document, get) - def __get_resource_tokens(self, document: TextDocument) -> List[Token]: + def __get_resource_tokens(self, document: TextDocument) -> list[Token]: lang = self.get_languages_for_document(document) - def get(text: str) -> List[Token]: + def get(text: str) -> list[Token]: with io.StringIO(text) as content: return [e for e in self.__internal_get_resource_tokens(content, lang=lang)] return self.__get_tokens_internal(document, get) - def get_init_tokens(self, document: TextDocument, data_only: bool = False) -> List[Token]: + def get_init_tokens(self, document: TextDocument, data_only: bool = False) -> list[Token]: if document.version is None: if data_only: return self.__get_init_tokens_data_only(document) @@ -328,19 +352,19 @@ def get_init_tokens(self, document: TextDocument, data_only: bool = False) -> Li return document.get_cache(self.__get_init_tokens_data_only) return document.get_cache(self.__get_init_tokens) - def __get_init_tokens_data_only(self, document: TextDocument) -> List[Token]: + def __get_init_tokens_data_only(self, document: TextDocument) -> list[Token]: lang = self.get_languages_for_document(document) - def get(text: str) -> List[Token]: + def get(text: str) -> list[Token]: with io.StringIO(text) as content: return [e for e in self.__internal_get_init_tokens(content, True, lang=lang)] return self.__get_tokens_internal(document, get) - def __get_init_tokens(self, document: TextDocument) -> List[Token]: + def __get_init_tokens(self, document: TextDocument) -> list[Token]: lang = self.get_languages_for_document(document) - def get(text: str) -> List[Token]: + def get(text: str) -> list[Token]: with io.StringIO(text) as content: return [e for e in self.__internal_get_init_tokens(content, lang=lang)] @@ -483,20 +507,277 @@ def __invalidate_namespace(self, sender: Namespace) -> None: def __namespace_initialized(self, sender: Namespace) -> None: if sender.document is not None: sender.document.set_data(self.INITIALIZED_NAMESPACE, sender) + + # Track reverse dependencies: record that this document imports its resources + self._track_imports(sender.document, sender) + + # Save to disk cache for faster restart (initial save without analysis data) + imports_manager = self.get_imports_manager(sender.document) + self._save_namespace_to_cache(sender, imports_manager) + self.namespace_initialized(self, sender) + def __namespace_analysed(self, sender: Namespace) -> None: + """Re-save namespace to cache after analysis to include diagnostics and analysis results.""" + if sender.document is not None: + imports_manager = self.get_imports_manager(sender.document) + self._save_namespace_to_cache(sender, imports_manager) + + def _track_imports(self, document: TextDocument, namespace: Namespace) -> None: + """Update the reverse dependency map for a namespace's imports.""" + with self._importers_lock: + # Track resource imports + for source in namespace.get_resources().keys(): + if source not in self._importers: + self._importers[source] = weakref.WeakSet() + self._importers[source].add(document) + + # Track library users (by source path for stable lookup) + with self._library_users_lock: + for entry in namespace.get_libraries().values(): + lib_key = entry.library_doc.source or entry.library_doc.name + if lib_key and lib_key not in self._library_users: + self._library_users[lib_key] = weakref.WeakSet() + if lib_key: + self._library_users[lib_key].add(document) + + # Track variables users (by source path for stable lookup) + with self._variables_users_lock: + for entry in namespace.get_variables_imports().values(): + var_key = entry.library_doc.source or entry.library_doc.name + if var_key and var_key not in self._variables_users: + self._variables_users[var_key] = weakref.WeakSet() + if var_key: + self._variables_users[var_key].add(document) + + # Periodically cleanup stale entries + self._track_count += 1 + if self._track_count >= _DEPENDENCY_CLEANUP_INTERVAL: + self._track_count = 0 + self._cleanup_stale_dependency_maps() + + def get_importers(self, source: str) -> list[TextDocument]: + """Get all documents that import a given source file (O(1) lookup).""" + with self._importers_lock: + if source in self._importers: + return list(self._importers[source]) + return [] + + def clear_importers(self, source: str) -> None: + """Clear the importers set for a source (called when source is modified).""" + with self._importers_lock: + if source in self._importers: + del self._importers[source] + + def get_library_users(self, library_doc: LibraryDoc) -> list[TextDocument]: + """Get all documents that use a given library (O(1) lookup by source path).""" + with self._library_users_lock: + lib_key = library_doc.source or library_doc.name + if lib_key and lib_key in self._library_users: + return list(self._library_users[lib_key]) + return [] + + def get_variables_users(self, variables_doc: LibraryDoc) -> list[TextDocument]: + """Get all documents that use a given variables file (O(1) lookup by source path).""" + with self._variables_users_lock: + var_key = variables_doc.source or variables_doc.name + if var_key and var_key in self._variables_users: + return list(self._variables_users[var_key]) + return [] + + def _cleanup_stale_dependency_maps(self) -> None: + """Remove entries with empty WeakSets from dependency maps. + + Called periodically to prevent memory accumulation from stale entries + where the object IDs have been reused after garbage collection. + """ + with self._importers_lock: + stale_importer_keys = [k for k, v in self._importers.items() if len(v) == 0] + for key in stale_importer_keys: + del self._importers[key] + + with self._library_users_lock: + stale_lib_keys = [k for k, v in self._library_users.items() if len(v) == 0] + for lib_key in stale_lib_keys: + del self._library_users[lib_key] + + with self._variables_users_lock: + stale_var_keys = [k for k, v in self._variables_users.items() if len(v) == 0] + for var_key in stale_var_keys: + del self._variables_users[var_key] + def get_initialized_namespace(self, document: TextDocument) -> Namespace: - result: Optional[Namespace] = document.get_data(self.INITIALIZED_NAMESPACE) + result: Namespace | None = document.get_data(self.INITIALIZED_NAMESPACE) if result is None: self._logger.debug(lambda: f"There is no initialized Namespace: {document.uri if document else None}") result = self.get_namespace(document) return result - def get_only_initialized_namespace(self, document: TextDocument) -> Optional[Namespace]: - return cast(Optional[Namespace], document.get_data(self.INITIALIZED_NAMESPACE)) + def get_only_initialized_namespace(self, document: TextDocument) -> Namespace | None: + return cast(Namespace | None, document.get_data(self.INITIALIZED_NAMESPACE)) + + def _try_load_namespace_from_cache( + self, + document: TextDocument, + model: ast.AST, + imports_manager: ImportsManager, + document_type: DocumentType | None, + languages: Languages | None, + workspace_languages: Languages | None, + ) -> Namespace | None: + """Attempt to load namespace from disk cache.""" + source = str(document.uri.to_path()) + source_path = Path(source) + + if not source_path.exists(): + return None + + try: + source_stat = source_path.stat() + current_mtime = source_stat.st_mtime_ns + current_size = source_stat.st_size + except OSError: + return None + + # Build cache filename using SHA256 for collision resistance + normalized = str(source_path.resolve()) + cache_key = hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:16] + cache_file = cache_key + ".cache" + + # Check if cache file exists + if not imports_manager.data_cache.cache_data_exists(CacheSection.NAMESPACE, cache_file): + return None + + # Load cache data (single file contains (meta, spec) tuple) + try: + saved_meta, cache_data = imports_manager.data_cache.read_cache_data( + CacheSection.NAMESPACE, cache_file, tuple + ) + except Exception: + self._logger.debug(lambda: f"Failed to read namespace cache for {source}", context_name="import") + return None + + # Type check the loaded data + if not isinstance(saved_meta, NamespaceMetaData) or not isinstance(cache_data, NamespaceCacheData): + self._logger.debug(lambda: f"Namespace cache type mismatch for {source}", context_name="import") + return None + + # Validate source file mtime + if saved_meta.mtime != current_mtime: + self._logger.debug(lambda: f"Namespace cache mtime mismatch for {source}", context_name="import") + return None + + # Fast path: if mtime AND size both match, skip expensive content hash computation + if saved_meta.file_size != current_size: + # Size changed - need content hash to validate + try: + _, current_hash = Namespace._compute_content_hash(source_path) + except OSError: + return None + + if saved_meta.content_hash != current_hash: + self._logger.debug( + lambda: f"Namespace cache content hash mismatch for {source}", context_name="import" + ) + return None + + # Validate environment identity (detects venv changes, PYTHONPATH changes, etc.) + if saved_meta.python_executable != sys.executable: + self._logger.debug( + lambda: f"Namespace cache Python executable mismatch for {source}", context_name="import" + ) + return None + + current_sys_path_hash = hashlib.sha256("\n".join(sys.path).encode("utf-8")).hexdigest()[:16] + if saved_meta.sys_path_hash != current_sys_path_hash: + self._logger.debug(lambda: f"Namespace cache sys.path hash mismatch for {source}", context_name="import") + return None + + # Validate all library source mtimes + for lib_source, lib_mtime in saved_meta.library_sources_mtimes: + lib_path = Path(lib_source) + try: + if not lib_path.exists() or lib_path.stat().st_mtime_ns != lib_mtime: + self._logger.debug( + lambda: f"Namespace cache library mtime mismatch for {lib_source}", context_name="import" + ) + return None + except OSError: + return None + + # Validate all resource source mtimes + for res_source, res_mtime in saved_meta.resource_sources_mtimes: + res_path = Path(res_source) + try: + if not res_path.exists() or res_path.stat().st_mtime_ns != res_mtime: + self._logger.debug( + lambda: f"Namespace cache resource mtime mismatch for {res_source}", context_name="import" + ) + return None + except OSError: + return None + + # Validate all variables source mtimes + for var_source, var_mtime in saved_meta.variables_sources_mtimes: + var_path = Path(var_source) + try: + if not var_path.exists() or var_path.stat().st_mtime_ns != var_mtime: + self._logger.debug( + lambda: f"Namespace cache variables mtime mismatch for {var_source}", context_name="import" + ) + return None + except OSError: + return None + + # Create namespace from cache data + result = Namespace.from_cache_data( + cache_data=cache_data, + imports_manager=imports_manager, + model=model, + source=source, + document=document, + document_type=document_type, + languages=languages, + workspace_languages=workspace_languages, + ) + + if result is not None: + self._logger.debug(lambda: f"Loaded namespace from cache for {source}", context_name="import") + + return result + + def _save_namespace_to_cache(self, namespace: Namespace, imports_manager: ImportsManager) -> None: + """Save initialized namespace to disk cache. + + Uses single-file format with atomic writes for consistency. + The cache file contains a (meta, spec) tuple. + """ + if not namespace._initialized: + return + + meta = namespace.get_cache_metadata() + if meta is None: + return + + cache_data = namespace.to_cache_data() + + # Build cache filename using SHA256 for collision resistance + source_path = Path(namespace.source) + normalized = str(source_path.resolve()) + cache_key = hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:16] + cache_file = cache_key + ".cache" + + # Save as single tuple (meta, spec) - atomic and consistent + try: + imports_manager.data_cache.save_cache_data( + CacheSection.NAMESPACE, cache_file, (meta, cache_data) + ) + self._logger.debug(lambda: f"Saved namespace to cache for {namespace.source}", context_name="import") + except OSError: + self._logger.debug(lambda: f"Failed to save namespace cache for {namespace.source}", context_name="import") def __get_namespace_for_document_type( - self, document: TextDocument, document_type: Optional[DocumentType] + self, document: TextDocument, document_type: DocumentType | None ) -> Namespace: if document_type is not None and document_type == DocumentType.INIT: model = self.get_init_model(document) @@ -511,6 +792,20 @@ def __get_namespace_for_document_type( languages, workspace_languages = self.build_languages_from_model(document, model) + # Try loading from disk cache first + cached = self._try_load_namespace_from_cache( + document, model, imports_manager, document_type, languages, workspace_languages + ) + if cached is not None: + cached.has_invalidated.add(self.__invalidate_namespace) + cached.has_initialized.add(self.__namespace_initialized) + cached.has_analysed.add(self.__namespace_analysed) + # Mark as initialized in document data and track imports + document.set_data(self.INITIALIZED_NAMESPACE, cached) + self._track_imports(document, cached) + return cached + + # Cache miss - create new namespace result = Namespace( imports_manager, model, @@ -522,6 +817,7 @@ def __get_namespace_for_document_type( ) result.has_invalidated.add(self.__invalidate_namespace) result.has_initialized.add(self.__namespace_initialized) + result.has_analysed.add(self.__namespace_analysed) return result @@ -572,21 +868,21 @@ def create_imports_manager(self, root_uri: Uri) -> ImportsManager: return result @event - def libraries_changed(sender, libraries: List[LibraryDoc]) -> None: ... + def libraries_changed(sender, libraries: list[LibraryDoc]) -> None: ... @event - def resources_changed(sender, resources: List[LibraryDoc]) -> None: ... + def resources_changed(sender, resources: list[LibraryDoc]) -> None: ... @event - def variables_changed(sender, variables: List[LibraryDoc]) -> None: ... + def variables_changed(sender, variables: list[LibraryDoc]) -> None: ... - def _on_libraries_changed(self, sender: ImportsManager, libraries: List[LibraryDoc]) -> None: + def _on_libraries_changed(self, sender: ImportsManager, libraries: list[LibraryDoc]) -> None: self.libraries_changed(self, libraries) - def _on_resources_changed(self, sender: ImportsManager, resources: List[LibraryDoc]) -> None: + def _on_resources_changed(self, sender: ImportsManager, resources: list[LibraryDoc]) -> None: self.resources_changed(self, resources) - def _on_variables_changed(self, sender: ImportsManager, variables: List[LibraryDoc]) -> None: + def _on_variables_changed(self, sender: ImportsManager, variables: list[LibraryDoc]) -> None: self.variables_changed(self, variables) def default_imports_manager(self) -> ImportsManager: @@ -606,7 +902,7 @@ def get_imports_manager(self, document: TextDocument) -> ImportsManager: def get_imports_manager_for_uri(self, uri: Uri) -> ImportsManager: return self.get_imports_manager_for_workspace_folder(self.workspace.get_workspace_folder(uri)) - def get_imports_manager_for_workspace_folder(self, folder: Optional[WorkspaceFolder]) -> ImportsManager: + def get_imports_manager_for_workspace_folder(self, folder: WorkspaceFolder | None) -> ImportsManager: if folder is None: if len(self.workspace.workspace_folders) == 1: folder = self.workspace.workspace_folders[0] diff --git a/packages/robot/src/robotcode/robot/diagnostics/imports_manager.py b/packages/robot/src/robotcode/robot/diagnostics/imports_manager.py index d5df54f6..3b156788 100644 --- a/packages/robot/src/robotcode/robot/diagnostics/imports_manager.py +++ b/packages/robot/src/robotcode/robot/diagnostics/imports_manager.py @@ -498,6 +498,24 @@ def filepath_base(self) -> str: raise ValueError("Cannot determine filepath base.") +@dataclass +class ResourceMetaData: + """Metadata for caching resource LibraryDoc to disk.""" + + meta_version: str + source: str + mtime: int + + @property + def filepath_base(self) -> str: + p = Path(self.source) + return f"{zlib.adler32(str(p.parent).encode('utf-8')):08x}_{p.stem}" + + +# Current version for resource cache invalidation +RESOURCE_META_VERSION = "1" + + class ImportsManager: _logger = LoggingDescriptor() @@ -834,9 +852,7 @@ def __resource_document_changed(self, document: TextDocument) -> None: lib_doc = r_entry.get_libdoc() r_entry.invalidate() - except (SystemExit, KeyboardInterrupt): - raise - except BaseException: + except Exception: result = True if result and lib_doc is not None: @@ -871,7 +887,7 @@ def did_change_watched_files(self, sender: Any, changes: List[FileEvent]) -> Non result = r_entry.check_file_changed(changes) if result is not None: resource_changed.append((r_key, result, lib_doc)) - except BaseException as e: + except Exception as e: self._logger.exception(e) raise @@ -1027,9 +1043,7 @@ def get_library_meta( ) return result, import_name, ignore_arguments - except (SystemExit, KeyboardInterrupt): - raise - except BaseException: + except Exception: pass return None, import_name, ignore_arguments @@ -1096,9 +1110,7 @@ def get_variables_meta( ) return result, import_name - except (SystemExit, KeyboardInterrupt): - raise - except BaseException: + except Exception: pass return None, name @@ -1232,7 +1244,12 @@ def __find_variables_simple( def executor(self) -> ProcessPoolExecutor: with self._executor_lock: if self._executor is None: - self._executor = ProcessPoolExecutor(mp_context=mp.get_context("spawn")) + # Cap at 4 workers to balance parallelism with memory usage + max_workers = min(mp.cpu_count() or 1, 4) + self._executor = ProcessPoolExecutor( + max_workers=max_workers, + mp_context=mp.get_context("spawn") + ) return self._executor @@ -1293,38 +1310,30 @@ def _get_library_libdoc( self._logger.exception(e) self._logger.debug(lambda: f"Load library in process {name}{args!r}", context_name="import") - # if self._process_pool_executor is None: - # self._process_pool_executor = ProcessPoolExecutor(max_workers=1, mp_context=mp.get_context("spawn")) - # executor = self._process_pool_executor - executor = ProcessPoolExecutor(max_workers=1, mp_context=mp.get_context("spawn")) try: - try: - result = executor.submit( - get_library_doc, - name, - args if not ignore_arguments else (), - working_dir, - base_dir, - self.get_resolvable_command_line_variables(), - variables, - ).result(self.load_library_timeout) - - except TimeoutError as e: - raise RuntimeError( - f"Loading library {name!r} with args {args!r} (working_dir={working_dir!r}, base_dir={base_dir!r}) " - f"timed out after {self.load_library_timeout} seconds. " - "The library may be slow or blocked during import. " - "If required, increase the timeout by setting the ROBOTCODE_LOAD_LIBRARY_TIMEOUT " - "environment variable." - ) from e - + result = self.executor.submit( + get_library_doc, + name, + args if not ignore_arguments else (), + working_dir, + base_dir, + self.get_resolvable_command_line_variables(), + variables, + ).result(self.load_library_timeout) + + except TimeoutError as e: + raise RuntimeError( + f"Loading library {name!r} with args {args!r} (working_dir={working_dir!r}, base_dir={base_dir!r}) " + f"timed out after {self.load_library_timeout} seconds. " + "The library may be slow or blocked during import. " + "If required, increase the timeout by setting the ROBOTCODE_LOAD_LIBRARY_TIMEOUT " + "environment variable." + ) from e except (SystemExit, KeyboardInterrupt): raise except BaseException as e: self._logger.exception(e) raise - finally: - executor.shutdown(wait=True) try: if meta is not None: @@ -1391,6 +1400,21 @@ def get_libdoc_for_library_import( return entry.get_libdoc() + def _get_resource_meta(self, source: str) -> Optional[ResourceMetaData]: + """Get metadata for a resource file for cache validation.""" + source_path = Path(source) + if not source_path.exists(): + return None + try: + mtime = source_path.stat().st_mtime_ns + except OSError: + return None + return ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source=source, + mtime=mtime, + ) + @_logger.call def get_libdoc_from_model( self, @@ -1399,6 +1423,7 @@ def get_libdoc_from_model( ) -> LibraryDoc: key = source + # Check in-memory cache first entry = None if model in self._resource_libdoc_cache: entry = self._resource_libdoc_cache.get(model, None) @@ -1406,15 +1431,163 @@ def get_libdoc_from_model( if entry and key in entry: return entry[key] + # Check disk cache + meta = self._get_resource_meta(source) + if meta is not None: + meta_file = meta.filepath_base + ".meta" + if self.data_cache.cache_data_exists(CacheSection.RESOURCE, meta_file): + try: + saved_meta = self.data_cache.read_cache_data(CacheSection.RESOURCE, meta_file, ResourceMetaData) + if saved_meta == meta: + spec_path = meta.filepath_base + ".spec" + self._logger.debug( + lambda: f"Use cached resource data for {source}", context_name="import" + ) + result = self.data_cache.read_cache_data(CacheSection.RESOURCE, spec_path, LibraryDoc) + # Store in in-memory cache too + if entry is None: + entry = {} + self._resource_libdoc_cache[model] = entry + entry[key] = result + return result + except Exception: + self._logger.debug( + lambda: f"Failed to load cached resource data for {source}", context_name="import" + ) + + # Cache miss - compute the LibraryDoc result = get_model_doc(model=model, source=source) + + # Store in in-memory cache if entry is None: entry = {} self._resource_libdoc_cache[model] = entry - entry[key] = result + # Save to disk cache + if meta is not None: + try: + meta_file = meta.filepath_base + ".meta" + spec_file = meta.filepath_base + ".spec" + self.data_cache.save_cache_data(CacheSection.RESOURCE, spec_file, result) + self.data_cache.save_cache_data(CacheSection.RESOURCE, meta_file, meta) + except OSError: + self._logger.debug(lambda: f"Failed to save resource cache for {source}", context_name="import") + return result + def get_libdoc_for_source( + self, source: Optional[str], name: Optional[str] = None + ) -> Optional[LibraryDoc]: + """Look up a library LibraryDoc by its source path from disk cache. + + Used when restoring namespace from cache - looks up cached library data. + Returns None if not found (cache miss or no source). + + Args: + source: The source path of the library + name: Optional library name, used to look up standard libraries that + are cached by name instead of source path + """ + if source is None and name is None: + return None + + # Check in-memory cache first by iterating through loaded libraries + with self._libaries_lock: + for entry in self._libaries.values(): + lib_doc = entry.get_libdoc() + if source is not None and lib_doc.source == source: + return lib_doc + if name is not None and lib_doc.name == name: + return lib_doc + + # Try disk cache using library name (for standard libraries) + # Standard libraries are cached with full module paths like "robot/libraries/BuiltIn.spec" + try: + if name is not None: + # First try the name directly (for libraries with dots like "mypackage.MyLibrary") + spec_file = name.replace(".", "/") + ".spec" + if self.data_cache.cache_data_exists(CacheSection.LIBRARY, spec_file): + return self.data_cache.read_cache_data(CacheSection.LIBRARY, spec_file, LibraryDoc) + + # Try standard Robot Framework library path + spec_file = "robot/libraries/" + name + ".spec" + if self.data_cache.cache_data_exists(CacheSection.LIBRARY, spec_file): + return self.data_cache.read_cache_data(CacheSection.LIBRARY, spec_file, LibraryDoc) + + # Try disk cache using source path to build cache key (for by_path libraries) + if source is not None: + source_path = Path(source) + if source_path.exists(): + cache_key = f"{zlib.adler32(str(source_path.parent).encode('utf-8')):08x}_{source_path.stem}" + + spec_file = cache_key + ".spec" + if self.data_cache.cache_data_exists(CacheSection.LIBRARY, spec_file): + return self.data_cache.read_cache_data(CacheSection.LIBRARY, spec_file, LibraryDoc) + except Exception as e: + self._logger.debug( + lambda e=e: f"get_libdoc_for_source failed for source={source}, name={name}: {e}", # type: ignore[misc] + context_name="import", + ) + + return None + + def get_resource_libdoc_for_source(self, source: Optional[str]) -> Optional[LibraryDoc]: + """Look up a resource LibraryDoc by its source path from disk cache. + + Used when restoring namespace from cache - looks up cached resource data. + Returns None if not found (cache miss or no source). + """ + if source is None: + return None + + source_path = Path(source) + if not source_path.exists(): + return None + + # Build cache filename using ResourceMetaData.filepath_base logic + cache_key = f"{zlib.adler32(str(source_path.parent).encode('utf-8')):08x}_{source_path.stem}" + spec_file = cache_key + ".spec" + + if self.data_cache.cache_data_exists(CacheSection.RESOURCE, spec_file): + try: + return self.data_cache.read_cache_data(CacheSection.RESOURCE, spec_file, LibraryDoc) + except (OSError, TypeError, EOFError, AttributeError, ImportError) as e: + self._logger.debug( + lambda e=e: f"get_resource_libdoc_for_source failed for {source}: {e}", # type: ignore[misc] + context_name="import", + ) + + return None + + def get_variables_libdoc_for_source(self, source: Optional[str]) -> Optional[LibraryDoc]: + """Look up a variables LibraryDoc by its source path from disk cache. + + Used when restoring namespace from cache - looks up cached variables data. + Returns None if not found (cache miss or no source). + """ + if source is None: + return None + + source_path = Path(source) + if not source_path.exists(): + return None + + # Build cache filename similar to variables cache logic + cache_key = f"{zlib.adler32(str(source_path.parent).encode('utf-8')):08x}_{source_path.stem}" + spec_file = cache_key + ".spec" + + if self.data_cache.cache_data_exists(CacheSection.VARIABLES, spec_file): + try: + return self.data_cache.read_cache_data(CacheSection.VARIABLES, spec_file, LibraryDoc) + except (OSError, TypeError, EOFError, AttributeError, ImportError) as e: + self._logger.debug( + lambda e=e: f"get_variables_libdoc_for_source failed for {source}: {e}", # type: ignore[misc] + context_name="import", + ) + + return None + def _get_variables_libdoc_handler( self, variables: Optional[Dict[str, Any]] = None, @@ -1474,36 +1647,31 @@ def _get_variables_libdoc( except BaseException as e: self._logger.exception(e) - executor = ProcessPoolExecutor(max_workers=1, mp_context=mp.get_context("spawn")) try: - try: - result = executor.submit( - get_variables_doc, - name, - args, - working_dir, - base_dir, - self.get_resolvable_command_line_variables() if resolve_command_line_vars else None, - variables, - ).result(self.load_library_timeout) - - except TimeoutError as e: - raise RuntimeError( - f"Loading variables {name!r} with args {args!r} (working_dir={working_dir!r}, " - f"base_dir={base_dir!r}) " - f"timed out after {self.load_library_timeout} seconds. " - "The variables may be slow or blocked during import. " - "If required, increase the timeout by setting the ROBOTCODE_LOAD_LIBRARY_TIMEOUT " - "environment variable." - ) from e - + result = self.executor.submit( + get_variables_doc, + name, + args, + working_dir, + base_dir, + self.get_resolvable_command_line_variables() if resolve_command_line_vars else None, + variables, + ).result(self.load_library_timeout) + + except TimeoutError as e: + raise RuntimeError( + f"Loading variables {name!r} with args {args!r} (working_dir={working_dir!r}, " + f"base_dir={base_dir!r}) " + f"timed out after {self.load_library_timeout} seconds. " + "The variables may be slow or blocked during import. " + "If required, increase the timeout by setting the ROBOTCODE_LOAD_LIBRARY_TIMEOUT " + "environment variable." + ) from e except (SystemExit, KeyboardInterrupt): raise except BaseException as e: self._logger.exception(e) raise - finally: - executor.shutdown(True) try: if meta is not None: diff --git a/packages/robot/src/robotcode/robot/diagnostics/namespace.py b/packages/robot/src/robotcode/robot/diagnostics/namespace.py index f6be8fe1..34e216fc 100644 --- a/packages/robot/src/robotcode/robot/diagnostics/namespace.py +++ b/packages/robot/src/robotcode/robot/diagnostics/namespace.py @@ -1,9 +1,13 @@ import ast import enum +import hashlib import itertools +import sys import weakref +import zlib from collections import OrderedDict, defaultdict from concurrent.futures import CancelledError +from dataclasses import dataclass from pathlib import Path from typing import ( Any, @@ -21,13 +25,16 @@ from robot.errors import VariableError from robot.parsing.lexer.tokens import Token -from robot.parsing.model.blocks import Keyword, SettingSection, TestCase, VariableSection +from robot.parsing.model.blocks import ( + Keyword, + SettingSection, + TestCase, + VariableSection, +) from robot.parsing.model.statements import Arguments, Setup, Statement, Timeout from robot.parsing.model.statements import LibraryImport as RobotLibraryImport from robot.parsing.model.statements import ResourceImport as RobotResourceImport -from robot.parsing.model.statements import ( - VariablesImport as RobotVariablesImport, -) +from robot.parsing.model.statements import VariablesImport as RobotVariablesImport from robotcode.core.concurrent import RLock from robotcode.core.event import event from robotcode.core.lsp.types import ( @@ -46,7 +53,7 @@ from robotcode.core.utils.logging import LoggingDescriptor from robotcode.core.utils.path import same_file -from ..utils import get_robot_version +from ..utils import get_robot_version, get_robot_version_str from ..utils.ast import ( get_first_variable_token, range_from_node, @@ -69,6 +76,7 @@ EnvironmentVariableDefinition, GlobalVariableDefinition, Import, + ImportedVariableDefinition, LibraryEntry, LibraryImport, LocalVariableDefinition, @@ -98,6 +106,90 @@ from robot.parsing.model.statements import Var +# Namespace cache version - bump major for incompatible format changes +# 1.0: Single-file cache format with atomic writes (meta + spec in one file) +NAMESPACE_META_VERSION = "1.0" + + +@dataclass(frozen=True) +class NamespaceMetaData: + """Metadata for validating namespace cache (immutable).""" + + meta_version: str + source: str + mtime: int # Source file modification time in nanoseconds + file_size: int # Source file size in bytes + content_hash: str # Tiered hash of file content (size + first 64KB + last 64KB) + library_sources_mtimes: tuple[tuple[str, int], ...] # ((library_source, mtime), ...) + resource_sources_mtimes: tuple[tuple[str, int], ...] # ((resource_source, mtime), ...) + variables_sources_mtimes: tuple[tuple[str, int], ...] # ((variables_source, mtime), ...) + robot_version: str + python_executable: str # sys.executable - detects venv changes + sys_path_hash: str # Hash of sys.path in original order - detects PYTHONPATH/install changes + + @property + def filepath_base(self) -> str: + p = Path(self.source) + return f"{zlib.adler32(str(p.parent).encode('utf-8')):08x}_{p.stem}" + + +@dataclass(frozen=True) +class _CachedEntryBase: + """Common fields for all cached import entries (immutable).""" + + name: str + import_name: str + library_doc_source: str | None # Source path to look up LibraryDoc + args: tuple[Any, ...] + alias: str | None + import_range: Range + import_source: str | None + alias_range: Range + + +@dataclass(frozen=True) +class CachedLibraryEntry(_CachedEntryBase): + """Serializable representation of LibraryEntry for caching (immutable).""" + + +@dataclass(frozen=True) +class CachedResourceEntry(_CachedEntryBase): + """Serializable representation of ResourceEntry for caching (immutable).""" + + imports: tuple[Import, ...] = () + variables: tuple[VariableDefinition, ...] = () + + +@dataclass(frozen=True) +class CachedVariablesEntry(_CachedEntryBase): + """Serializable representation of VariablesEntry for caching (immutable).""" + + variables: tuple["ImportedVariableDefinition", ...] = () + + +@dataclass(frozen=True) +class NamespaceCacheData: + """Serializable namespace state for disk caching (immutable).""" + + # Initialization data + libraries: tuple[tuple[str, CachedLibraryEntry], ...] + resources: tuple[tuple[str, CachedResourceEntry], ...] + resources_files: tuple[tuple[str, str], ...] # ((source, key), ...) + variables_imports: tuple[tuple[str, CachedVariablesEntry], ...] + own_variables: tuple[VariableDefinition, ...] + imports: tuple[Import, ...] + library_doc: LibraryDoc | None + + # Analysis data (cached to skip re-analysis on warm start) + analyzed: bool = False # True if analysis was completed when cache was saved + diagnostics: tuple[Diagnostic, ...] = () + test_case_definitions: tuple[TestCaseDefinition, ...] = () + tag_definitions: tuple[TagDefinition, ...] = () + # Namespace references: ((import_index, (locations...)), ...) + # Maps import index (in imports tuple) to set of locations where the import is referenced + namespace_references: tuple[tuple[int, tuple[Location, ...]], ...] = () + + class DiagnosticsError(Exception): pass @@ -968,6 +1060,472 @@ def get_library_doc(self) -> LibraryDoc: return self._library_doc + @staticmethod + def _compute_content_hash(path: Path) -> tuple[int, str]: + """Compute robust content hash using tiered strategy. + + Returns (file_size, hash_string). + Hash covers: file_size + first_64KB + last_64KB + + This catches: + - Appended content (size change) + - Inserted content (size change) + - Modified content in first 64KB + - Modified content in last 64KB + """ + stat = path.stat() + file_size = stat.st_size + + with open(path, "rb") as f: + first_chunk = f.read(65536) + + # For files > 64KB, also hash last 64KB + if file_size > 65536: + f.seek(max(0, file_size - 65536)) + last_chunk = f.read(65536) + else: + last_chunk = b"" + + # Combine size + both chunks into single hash + hasher = hashlib.sha256() + hasher.update(f"{file_size}:".encode()) + hasher.update(first_chunk) + hasher.update(last_chunk) + + return file_size, hasher.hexdigest() + + def get_cache_metadata(self) -> NamespaceMetaData | None: + """Generate metadata for cache validation.""" + if not self._initialized: + return None + + source_path = Path(self.source) + if not source_path.exists(): + return None + + try: + # Compute content hash for robust validation + file_size, content_hash = self._compute_content_hash(source_path) + except OSError: + self._logger.debug(lambda: f"Failed to compute content hash for {self.source}") + return None + + library_mtimes: list[tuple[str, int]] = [] + for entry in self._libraries.values(): + if entry.library_doc.source: + lib_path = Path(entry.library_doc.source) + if lib_path.exists(): + library_mtimes.append((entry.library_doc.source, lib_path.stat().st_mtime_ns)) + + resource_mtimes: list[tuple[str, int]] = [] + for entry in self._resources.values(): + if entry.library_doc.source: + res_path = Path(entry.library_doc.source) + if res_path.exists(): + resource_mtimes.append((entry.library_doc.source, res_path.stat().st_mtime_ns)) + + variables_mtimes: list[tuple[str, int]] = [] + for entry in self._variables_imports.values(): + if entry.library_doc.source: + var_path = Path(entry.library_doc.source) + if var_path.exists(): + variables_mtimes.append((entry.library_doc.source, var_path.stat().st_mtime_ns)) + + # Compute environment identity - hash sys.path in original order + # Order matters for import resolution (first match wins) + sys_path_hash = hashlib.sha256("\n".join(sys.path).encode("utf-8")).hexdigest()[:16] + + return NamespaceMetaData( + meta_version=NAMESPACE_META_VERSION, + source=self.source, + mtime=source_path.stat().st_mtime_ns, + file_size=file_size, + content_hash=content_hash, + library_sources_mtimes=tuple(library_mtimes), + resource_sources_mtimes=tuple(resource_mtimes), + variables_sources_mtimes=tuple(variables_mtimes), + robot_version=get_robot_version_str(), + python_executable=sys.executable, + sys_path_hash=sys_path_hash, + ) + + def _serialize_namespace_references(self) -> tuple[tuple[int, tuple[Location, ...]], ...]: + """Serialize _namespace_references for caching. + + Maps LibraryEntry keys to import indices (position in _imports list). + """ + if self._namespace_references is None or self._imports is None: + return () + + # Build reverse mapping: LibraryEntry -> import index + entry_to_index: dict[LibraryEntry, int] = {} + for i, imp in enumerate(self._imports): + if imp in self._import_entries: + entry_to_index[self._import_entries[imp]] = i + + # Serialize namespace references using import indices + result: list[tuple[int, tuple[Location, ...]]] = [] + for entry, locations in self._namespace_references.items(): + if entry in entry_to_index: + result.append((entry_to_index[entry], tuple(locations))) + + return tuple(result) + + def to_cache_data(self) -> NamespaceCacheData: + """Extract serializable state for disk caching.""" + # Convert LibraryEntry -> CachedLibraryEntry + cached_libraries: list[tuple[str, CachedLibraryEntry]] = [] + for key, entry in self._libraries.items(): + cached_libraries.append(( + key, + CachedLibraryEntry( + name=entry.name, + import_name=entry.import_name, + library_doc_source=entry.library_doc.source, + args=entry.args, + alias=entry.alias, + import_range=entry.import_range, + import_source=entry.import_source, + alias_range=entry.alias_range, + ), + )) + + # Convert ResourceEntry -> CachedResourceEntry + cached_resources: list[tuple[str, CachedResourceEntry]] = [] + for key, entry in self._resources.items(): + cached_resources.append(( + key, + CachedResourceEntry( + name=entry.name, + import_name=entry.import_name, + library_doc_source=entry.library_doc.source, + args=entry.args, + alias=entry.alias, + import_range=entry.import_range, + import_source=entry.import_source, + alias_range=entry.alias_range, + imports=tuple(entry.imports), + variables=tuple(entry.variables), + ), + )) + + # Build resources_files mapping (source -> key) + resources_files: list[tuple[str, str]] = [] + for source, entry in self._resources_files.items(): + # Find the key in _resources that corresponds to this entry + for key, res_entry in self._resources.items(): + if res_entry is entry: + resources_files.append((source, key)) + break + + # Convert VariablesEntry -> CachedVariablesEntry + cached_variables: list[tuple[str, CachedVariablesEntry]] = [] + for key, entry in self._variables_imports.items(): + cached_variables.append(( + key, + CachedVariablesEntry( + name=entry.name, + import_name=entry.import_name, + library_doc_source=entry.library_doc.source, + args=entry.args, + alias=entry.alias, + import_range=entry.import_range, + import_source=entry.import_source, + alias_range=entry.alias_range, + variables=tuple(entry.variables), + ), + )) + + return NamespaceCacheData( + libraries=tuple(cached_libraries), + resources=tuple(cached_resources), + resources_files=tuple(resources_files), + variables_imports=tuple(cached_variables), + own_variables=tuple(self._own_variables) if self._own_variables is not None else (), + imports=tuple(self._imports) if self._imports is not None else (), + library_doc=self._library_doc, + # Include analysis results if analysis was completed + analyzed=self._analyzed, + diagnostics=tuple(self._diagnostics) if self._diagnostics is not None else (), + test_case_definitions=( + tuple(self._test_case_definitions) if self._test_case_definitions is not None else () + ), + tag_definitions=tuple(self._tag_definitions) if self._tag_definitions is not None else (), + namespace_references=self._serialize_namespace_references(), + ) + + @classmethod + def _restore_libraries_from_cache( + cls, + ns: "Namespace", + cached_libraries: tuple[tuple[str, CachedLibraryEntry], ...], + imports_manager: "ImportsManager", + ) -> bool: + """Restore library entries from cache. Returns False if cache is stale.""" + for key, cached_entry in cached_libraries: + library_doc = imports_manager.get_libdoc_for_source( + cached_entry.library_doc_source, cached_entry.name + ) + if library_doc is None: + return False + ns._libraries[key] = LibraryEntry( + name=cached_entry.name, + import_name=cached_entry.import_name, + library_doc=library_doc, + args=cached_entry.args, + alias=cached_entry.alias, + import_range=cached_entry.import_range, + import_source=cached_entry.import_source, + alias_range=cached_entry.alias_range, + ) + return True + + @classmethod + def _restore_resources_from_cache( + cls, + ns: "Namespace", + cached_resources: tuple[tuple[str, CachedResourceEntry], ...], + imports_manager: "ImportsManager", + ) -> bool: + """Restore resource entries from cache. Returns False if cache is stale.""" + for key, cached_entry in cached_resources: + library_doc = imports_manager.get_resource_libdoc_for_source(cached_entry.library_doc_source) + if library_doc is None: + return False + ns._resources[key] = ResourceEntry( + name=cached_entry.name, + import_name=cached_entry.import_name, + library_doc=library_doc, + args=cached_entry.args, + alias=cached_entry.alias, + import_range=cached_entry.import_range, + import_source=cached_entry.import_source, + alias_range=cached_entry.alias_range, + imports=list(cached_entry.imports), + variables=list(cached_entry.variables), + ) + return True + + @classmethod + def _restore_variables_from_cache( + cls, + ns: "Namespace", + cached_variables: tuple[tuple[str, CachedVariablesEntry], ...], + imports_manager: "ImportsManager", + ) -> bool: + """Restore variables entries from cache. Returns False if cache is stale.""" + for key, cached_entry in cached_variables: + library_doc = imports_manager.get_variables_libdoc_for_source(cached_entry.library_doc_source) + if library_doc is None: + return False + ns._variables_imports[key] = VariablesEntry( + name=cached_entry.name, + import_name=cached_entry.import_name, + library_doc=library_doc, + args=cached_entry.args, + alias=cached_entry.alias, + import_range=cached_entry.import_range, + import_source=cached_entry.import_source, + alias_range=cached_entry.alias_range, + variables=list(cached_entry.variables), + ) + return True + + @classmethod + def _match_library_import( + cls, + imp: LibraryImport, + entry: LibraryEntry, + ) -> bool: + """Match a library import to an entry using resolution-based matching. + + Priority order: + 1. Exact alias match (if import has alias) + 2. Exact import_name match + 3. Exact library name match + 4. Source path match (for path-based imports) + + Does NOT use substring matching to avoid false positives like + "MyLib" matching "MyLibExtended". + """ + # 1. Best: alias match (most specific) + if imp.alias and entry.name == imp.alias: + return True + + # 2. Exact import_name match + if imp.name and entry.import_name == imp.name: + return True + + # 3. Exact library name match (case-insensitive for standard libs) + if imp.name and entry.name: + if entry.name == imp.name: + return True + # Case-insensitive match for library names + if entry.name.lower() == imp.name.lower(): + return True + + # 4. Source path match for path-based imports + if imp.name and entry.library_doc.source: + # Check if import name ends with the library filename + lib_filename = Path(entry.library_doc.source).stem + imp_path = Path(imp.name) + if imp_path.stem == lib_filename: + return True + # Also check the full library doc name + if entry.library_doc.name and entry.library_doc.name == imp.name: + return True + + return False + + @classmethod + def _rebuild_import_entries(cls, ns: "Namespace") -> None: + """Rebuild _import_entries mapping from restored imports and library/resource/variables entries. + + This is needed after restoring from cache so the analyzer can find namespace references. + The _import_entries dict maps Import objects to their corresponding LibraryEntry. + + Note: When the same library/resource is imported multiple times, each import gets its + own entry in _import_entries (with the same library_doc but different import_range/source). + """ + if ns._imports is None: + return + + for imp in ns._imports: + if isinstance(imp, LibraryImport): + # Find a library entry using resolution-based matching + for entry in ns._libraries.values(): + if cls._match_library_import(imp, entry): + # Create a new entry for this import with the correct range/source + ns._import_entries[imp] = LibraryEntry( + name=entry.name, + import_name=imp.name or "", + library_doc=entry.library_doc, + args=imp.args, + alias=imp.alias, + import_range=imp.range, + import_source=imp.source, + alias_range=imp.alias_range, + ) + break + elif isinstance(imp, ResourceImport): + for entry in ns._resources.values(): + if entry.import_name == imp.name or entry.name == imp.name: + ns._import_entries[imp] = ResourceEntry( + name=entry.name, + import_name=imp.name or "", + library_doc=entry.library_doc, + args=(), + alias=None, + import_range=imp.range, + import_source=imp.source, + imports=entry.imports, + variables=entry.variables, + ) + break + elif isinstance(imp, VariablesImport): + for entry in ns._variables_imports.values(): + if entry.import_name == imp.name or entry.name == imp.name: + ns._import_entries[imp] = VariablesEntry( + name=entry.name, + import_name=imp.name or "", + library_doc=entry.library_doc, + args=imp.args, + alias=None, + import_range=imp.range, + import_source=imp.source, + variables=entry.variables, + ) + break + + @classmethod + def _restore_namespace_references( + cls, + ns: "Namespace", + cached_refs: tuple[tuple[int, tuple[Location, ...]], ...], + ) -> None: + """Restore _namespace_references from cached import indices. + + Maps import indices back to LibraryEntry objects using _import_entries. + """ + if not cached_refs or ns._imports is None: + ns._namespace_references = {} + return + + # Build mapping: import index -> LibraryEntry + index_to_entry: dict[int, LibraryEntry] = {} + for i, imp in enumerate(ns._imports): + if imp in ns._import_entries: + index_to_entry[i] = ns._import_entries[imp] + + # Restore namespace references + ns._namespace_references = {} + for import_idx, locations in cached_refs: + if import_idx in index_to_entry: + entry = index_to_entry[import_idx] + ns._namespace_references[entry] = set(locations) + + @classmethod + def from_cache_data( + cls, + cache_data: NamespaceCacheData, + imports_manager: "ImportsManager", + model: ast.AST, + source: str, + document: TextDocument | None = None, + document_type: "DocumentType | None" = None, + languages: Languages | None = None, + workspace_languages: Languages | None = None, + ) -> "Namespace | None": + """Create a pre-initialized namespace from cached data.""" + # Create namespace instance without initializing + ns = cls( + imports_manager=imports_manager, + model=model, + source=source, + document=document, + document_type=document_type, + languages=languages, + workspace_languages=workspace_languages, + ) + + # Restore libraries + if not cls._restore_libraries_from_cache(ns, cache_data.libraries, imports_manager): + return None + + # Restore resources + if not cls._restore_resources_from_cache(ns, cache_data.resources, imports_manager): + return None + + # Restore resources_files mapping + for src, key in cache_data.resources_files: + if key in ns._resources: + ns._resources_files[src] = ns._resources[key] + + # Restore variables + if not cls._restore_variables_from_cache(ns, cache_data.variables_imports, imports_manager): + return None + + # Restore other state + ns._own_variables = list(cache_data.own_variables) + ns._imports = list(cache_data.imports) + ns._library_doc = cache_data.library_doc + + # Rebuild _import_entries mapping from restored imports and entries + # This is needed for the analyzer to find namespace references + cls._rebuild_import_entries(ns) + + # Mark as initialized + ns._initialized = True + + # Restore cached diagnostics if available (analysis will still run to populate references) + # Note: We don't set _analyzed=True because features like goto definition need + # _variable_references and _keyword_references which aren't cached yet. + # The analysis phase will run but initialization is cached, providing partial speedup. + if cache_data.analyzed and cache_data.diagnostics: + ns._diagnostics = list(cache_data.diagnostics) + + return ns + class DataEntry(NamedTuple): libraries: Dict[str, LibraryEntry] = OrderedDict() resources: Dict[str, ResourceEntry] = OrderedDict() diff --git a/tests/robotcode/language_server/robotframework/parts/test_namespace_caching.py b/tests/robotcode/language_server/robotframework/parts/test_namespace_caching.py new file mode 100644 index 00000000..510b791e --- /dev/null +++ b/tests/robotcode/language_server/robotframework/parts/test_namespace_caching.py @@ -0,0 +1,222 @@ +"""Integration tests for namespace caching functionality.""" + +import pickle +from pathlib import Path + +import pytest + +from robotcode.language_server.robotframework.protocol import ( + RobotLanguageServerProtocol, +) + +# Cache directory relative to the test data root +DATA_ROOT = Path(__file__).parent / "data" +CACHE_DIR = DATA_ROOT / ".robotcode_cache" + + +class TestNamespaceCaching: + """Integration tests for namespace cache behavior.""" + + def test_cache_directory_created_after_analysis( + self, + protocol: RobotLanguageServerProtocol, + ) -> None: + """Cache directory is created after workspace analysis.""" + # The protocol fixture already runs workspace diagnostics + # which should create the cache directory + assert CACHE_DIR.exists(), "Cache directory should be created" + + def test_namespace_cache_files_created( + self, + protocol: RobotLanguageServerProtocol, + ) -> None: + """Namespace cache files are created for analyzed robot files.""" + # Look for namespace cache files + ns_cache_dirs = list(CACHE_DIR.glob("*/*/namespace")) + + assert len(ns_cache_dirs) > 0, "Should have namespace cache directories" + + # Check for cache files (either .cache.pkl single-file or legacy .meta.pkl/.spec.pkl) + cache_files: list[Path] = [] + for ns_dir in ns_cache_dirs: + cache_files.extend(ns_dir.glob("*.cache.pkl")) + cache_files.extend(ns_dir.glob("*.meta.pkl")) + + assert len(cache_files) > 0, "Should have namespace cache files" + + def test_cache_file_contains_valid_data( + self, + protocol: RobotLanguageServerProtocol, + ) -> None: + """Cache files contain valid pickled metadata and spec data.""" + ns_cache_dirs = list(CACHE_DIR.glob("*/*/namespace")) + if not ns_cache_dirs: + pytest.skip("No namespace cache directory found") + + # Find a cache file + cache_files = list(ns_cache_dirs[0].glob("*.cache.pkl")) + if not cache_files: + pytest.skip("No cache files found") + + # Verify it's valid pickle with expected structure + with open(cache_files[0], "rb") as f: + data = pickle.load(f) + + # Single-file format stores (meta, spec) tuple + assert isinstance(data, tuple), "Cache should be a tuple" + assert len(data) == 2, "Cache should have (meta, spec)" + + meta, _spec = data + # Verify metadata has required fields + assert hasattr(meta, "source"), "Meta should have source" + assert hasattr(meta, "mtime"), "Meta should have mtime" + assert hasattr(meta, "content_hash"), "Meta should have content_hash" + + def test_cache_metadata_tracks_environment( + self, + protocol: RobotLanguageServerProtocol, + ) -> None: + """Cache metadata includes Python environment tracking fields.""" + ns_cache_dirs = list(CACHE_DIR.glob("*/*/namespace")) + if not ns_cache_dirs: + pytest.skip("No namespace cache directory found") + + cache_files = list(ns_cache_dirs[0].glob("*.cache.pkl")) + if not cache_files: + pytest.skip("No cache files found") + + with open(cache_files[0], "rb") as f: + meta, _spec = pickle.load(f) + + # Environment tracking fields (for detecting venv changes) + assert hasattr(meta, "python_executable"), "Should track python_executable" + assert hasattr(meta, "sys_path_hash"), "Should track sys_path_hash" + assert hasattr(meta, "robot_version"), "Should track robot_version" + + def test_corrupt_cache_does_not_crash( + self, + protocol: RobotLanguageServerProtocol, + ) -> None: + """Corrupted cache files are handled gracefully without crashing.""" + ns_cache_dirs = list(CACHE_DIR.glob("*/*/namespace")) + if not ns_cache_dirs: + pytest.skip("No namespace cache directory found") + + # Create a corrupt cache file + corrupt_file = ns_cache_dirs[0] / "corrupt_test.cache.pkl" + corrupt_file.write_bytes(b"NOT VALID PICKLE DATA") + + try: + # Access a document - should not crash despite corrupt cache + test_file = DATA_ROOT / "tests" / "hover.robot" + if test_file.exists(): + doc = protocol.documents.get_or_open_document(test_file, "robotframework") + # Try to get namespace (triggers cache lookup) + ns = protocol.documents_cache.get_namespace(doc) + assert ns is not None, "Should get namespace despite corrupt sibling cache" + finally: + # Cleanup + if corrupt_file.exists(): + corrupt_file.unlink() + + def test_different_files_have_different_cache_keys( + self, + protocol: RobotLanguageServerProtocol, + ) -> None: + """Files in different directories have unique cache keys (no collisions).""" + ns_cache_dirs = list(CACHE_DIR.glob("*/*/namespace")) + if not ns_cache_dirs: + pytest.skip("No namespace cache directory found") + + # Collect all cache file names + cache_files: list[Path] = [] + for ns_dir in ns_cache_dirs: + cache_files.extend(ns_dir.glob("*.cache.pkl")) + + if len(cache_files) < 2: + pytest.skip("Need at least 2 cache files to test uniqueness") + + # All cache file names should be unique + names = [f.name for f in cache_files] + assert len(names) == len(set(names)), "Cache file names should be unique" + + +class TestCacheInvalidation: + """Tests for cache invalidation behavior.""" + + def test_namespace_available_for_document( + self, + protocol: RobotLanguageServerProtocol, + ) -> None: + """Namespace is available for documents after analysis.""" + test_file = DATA_ROOT / "tests" / "hover.robot" + if not test_file.exists(): + pytest.skip("Test file not found") + + doc = protocol.documents.get_or_open_document(test_file, "robotframework") + ns = protocol.documents_cache.get_namespace(doc) + + assert ns is not None, "Should have namespace for document" + assert ns.source is not None, "Namespace should have source" + + def test_namespace_has_source_and_imports( + self, + protocol: RobotLanguageServerProtocol, + ) -> None: + """Namespace contains source path and import information.""" + test_file = DATA_ROOT / "tests" / "hover.robot" + if not test_file.exists(): + pytest.skip("Test file not found") + + doc = protocol.documents.get_or_open_document(test_file, "robotframework") + ns = protocol.documents_cache.get_namespace(doc) + + assert ns is not None + assert ns.source is not None, "Namespace should have source path" + # Namespace should have libraries (at least BuiltIn is implicit) + assert hasattr(ns, "get_libraries"), "Namespace should support get_libraries" + + +class TestLibraryDocCaching: + """Tests for library documentation caching.""" + + def test_libdoc_cache_directory_exists( + self, + protocol: RobotLanguageServerProtocol, + ) -> None: + """Library documentation cache directory is created.""" + libdoc_dirs = list(CACHE_DIR.glob("*/*/libdoc")) + + # After analyzing files that import libraries, should have libdoc cache + assert len(libdoc_dirs) > 0, "Should have libdoc cache directories" + + def test_libdoc_cache_files_exist( + self, + protocol: RobotLanguageServerProtocol, + ) -> None: + """Library documentation cache contains pickle files.""" + libdoc_dirs = list(CACHE_DIR.glob("*/*/libdoc")) + if not libdoc_dirs: + pytest.skip("No libdoc cache directory found") + + cache_files: list[Path] = [] + for libdoc_dir in libdoc_dirs: + cache_files.extend(libdoc_dir.glob("*.pkl")) + + assert len(cache_files) > 0, "Should have libdoc cache files" + + def test_builtin_library_is_cached( + self, + protocol: RobotLanguageServerProtocol, + ) -> None: + """BuiltIn library documentation is cached.""" + libdoc_dirs = list(CACHE_DIR.glob("*/*/libdoc")) + if not libdoc_dirs: + pytest.skip("No libdoc cache directory found") + + # Look for BuiltIn library cache (may be in subdirectory like robot/libraries/) + builtin_files: list[Path] = [] + for libdoc_dir in libdoc_dirs: + builtin_files.extend(libdoc_dir.glob("**/*BuiltIn*")) + + assert len(builtin_files) > 0, "BuiltIn library should be cached" diff --git a/tests/robotcode/robot/diagnostics/test_data_cache.py b/tests/robotcode/robot/diagnostics/test_data_cache.py new file mode 100644 index 00000000..827ab365 --- /dev/null +++ b/tests/robotcode/robot/diagnostics/test_data_cache.py @@ -0,0 +1,257 @@ +"""Unit tests for data_cache.py - cache implementations.""" + +import pickle +from dataclasses import dataclass +from pathlib import Path + +import pytest + +from robotcode.robot.diagnostics.data_cache import ( + CacheSection, + JsonDataCache, + PickleDataCache, +) + + +@dataclass +class SampleData: + """Sample dataclass for testing serialization.""" + + name: str + value: int + + +class TestCacheSection: + """Tests for CacheSection enum.""" + + def test_cache_section_values(self) -> None: + """Verify CacheSection enum has expected values.""" + assert CacheSection.LIBRARY.value == "libdoc" + assert CacheSection.VARIABLES.value == "variables" + assert CacheSection.RESOURCE.value == "resource" + assert CacheSection.NAMESPACE.value == "namespace" + + +class TestPickleDataCache: + """Tests for PickleDataCache implementation.""" + + def test_init_creates_cache_directory(self, tmp_path: Path) -> None: + """Cache directory is created on initialization.""" + cache_dir = tmp_path / "cache" + assert not cache_dir.exists() + + PickleDataCache(cache_dir) + + assert cache_dir.exists() + assert (cache_dir / ".gitignore").exists() + + def test_init_with_existing_directory(self, tmp_path: Path) -> None: + """Initialization works with existing directory.""" + cache_dir = tmp_path / "cache" + cache_dir.mkdir(parents=True) + + cache = PickleDataCache(cache_dir) + + assert cache.cache_dir == cache_dir + + def test_build_cache_data_filename(self, tmp_path: Path) -> None: + """Filename is built correctly with section and entry name.""" + cache = PickleDataCache(tmp_path) + + path = cache.build_cache_data_filename(CacheSection.LIBRARY, "test_entry") + + assert path == tmp_path / "libdoc" / "test_entry.pkl" + + def test_cache_data_exists_returns_false_for_missing(self, tmp_path: Path) -> None: + """cache_data_exists returns False when file doesn't exist.""" + cache = PickleDataCache(tmp_path) + + assert cache.cache_data_exists(CacheSection.LIBRARY, "nonexistent") is False + + def test_cache_data_exists_returns_true_for_existing(self, tmp_path: Path) -> None: + """cache_data_exists returns True when file exists.""" + cache = PickleDataCache(tmp_path) + cache.save_cache_data(CacheSection.LIBRARY, "test", {"key": "value"}) + + assert cache.cache_data_exists(CacheSection.LIBRARY, "test") is True + + def test_save_and_read_cache_data_dict(self, tmp_path: Path) -> None: + """Save and read dictionary data correctly.""" + cache = PickleDataCache(tmp_path) + data = {"name": "test", "values": [1, 2, 3]} + + cache.save_cache_data(CacheSection.LIBRARY, "test", data) + result = cache.read_cache_data(CacheSection.LIBRARY, "test", dict) + + assert result == data + + def test_save_and_read_cache_data_dataclass(self, tmp_path: Path) -> None: + """Save and read dataclass correctly.""" + cache = PickleDataCache(tmp_path) + data = SampleData(name="test", value=42) + + cache.save_cache_data(CacheSection.NAMESPACE, "sample", data) + result = cache.read_cache_data(CacheSection.NAMESPACE, "sample", SampleData) + + assert result == data + + def test_read_cache_data_type_mismatch_raises_typeerror(self, tmp_path: Path) -> None: + """TypeError is raised when cached data doesn't match expected type.""" + cache = PickleDataCache(tmp_path) + cache.save_cache_data(CacheSection.LIBRARY, "test", {"key": "value"}) + + with pytest.raises(TypeError, match=r"Expected.*str.*got.*dict"): + cache.read_cache_data(CacheSection.LIBRARY, "test", str) + + def test_read_cache_data_accepts_tuple_of_types(self, tmp_path: Path) -> None: + """read_cache_data accepts a tuple of types for validation.""" + cache = PickleDataCache(tmp_path) + cache.save_cache_data(CacheSection.LIBRARY, "test", {"key": "value"}) + + result = cache.read_cache_data(CacheSection.LIBRARY, "test", (dict, list)) + + assert result == {"key": "value"} + + def test_read_cache_data_missing_file_raises_error(self, tmp_path: Path) -> None: + """FileNotFoundError is raised when cache file doesn't exist.""" + cache = PickleDataCache(tmp_path) + + with pytest.raises(FileNotFoundError): + cache.read_cache_data(CacheSection.LIBRARY, "nonexistent", dict) + + def test_save_creates_section_directory(self, tmp_path: Path) -> None: + """Section subdirectory is created when saving.""" + cache = PickleDataCache(tmp_path) + + cache.save_cache_data(CacheSection.VARIABLES, "test", {"data": 1}) + + assert (tmp_path / "variables").is_dir() + + def test_save_overwrites_existing_file(self, tmp_path: Path) -> None: + """Existing cache file is overwritten on save.""" + cache = PickleDataCache(tmp_path) + cache.save_cache_data(CacheSection.LIBRARY, "test", {"version": 1}) + cache.save_cache_data(CacheSection.LIBRARY, "test", {"version": 2}) + + result = cache.read_cache_data(CacheSection.LIBRARY, "test", dict) + + assert result == {"version": 2} + + def test_atomic_write_no_temp_files_left(self, tmp_path: Path) -> None: + """No temporary files are left after successful save.""" + cache = PickleDataCache(tmp_path) + cache.save_cache_data(CacheSection.LIBRARY, "test", {"data": 1}) + + section_dir = tmp_path / "libdoc" + files = list(section_dir.iterdir()) + + assert len(files) == 1 + assert files[0].suffix == ".pkl" + + def test_read_corrupt_pickle_raises_error(self, tmp_path: Path) -> None: + """UnpicklingError is raised when pickle data is corrupt.""" + cache = PickleDataCache(tmp_path) + cache_file = cache.build_cache_data_filename(CacheSection.LIBRARY, "corrupt") + cache_file.parent.mkdir(parents=True, exist_ok=True) + cache_file.write_bytes(b"not valid pickle data") + + with pytest.raises((pickle.UnpicklingError, EOFError)): + cache.read_cache_data(CacheSection.LIBRARY, "corrupt", dict) + + def test_different_sections_are_isolated(self, tmp_path: Path) -> None: + """Data in different sections doesn't interfere.""" + cache = PickleDataCache(tmp_path) + cache.save_cache_data(CacheSection.LIBRARY, "same_name", {"section": "library"}) + cache.save_cache_data(CacheSection.RESOURCE, "same_name", {"section": "resource"}) + + lib_data = cache.read_cache_data(CacheSection.LIBRARY, "same_name", dict) + res_data = cache.read_cache_data(CacheSection.RESOURCE, "same_name", dict) + + assert lib_data["section"] == "library" + assert res_data["section"] == "resource" + + +class TestJsonDataCache: + """Tests for JsonDataCache implementation.""" + + def test_build_cache_data_filename(self, tmp_path: Path) -> None: + """Filename uses .json extension.""" + cache = JsonDataCache(tmp_path) + + path = cache.build_cache_data_filename(CacheSection.LIBRARY, "test_entry") + + assert path == tmp_path / "libdoc" / "test_entry.json" + + def test_cache_data_exists(self, tmp_path: Path) -> None: + """cache_data_exists works for JSON cache.""" + cache = JsonDataCache(tmp_path) + + assert cache.cache_data_exists(CacheSection.LIBRARY, "test") is False + + cache.save_cache_data(CacheSection.LIBRARY, "test", {"key": "value"}) + + assert cache.cache_data_exists(CacheSection.LIBRARY, "test") is True + + def test_save_and_read_cache_data(self, tmp_path: Path) -> None: + """Save and read JSON data correctly.""" + cache = JsonDataCache(tmp_path) + data = {"name": "test", "values": [1, 2, 3]} + + cache.save_cache_data(CacheSection.LIBRARY, "test", data) + result = cache.read_cache_data(CacheSection.LIBRARY, "test", dict) + + assert result == data + + +class TestCacheEdgeCases: + """Edge case tests for cache implementations.""" + + @pytest.mark.parametrize( + "entry_name", + [ + "simple", + "with_underscore", + "with-dash", + "with.dots", + "nested/path/entry", + "unicode_日本語", + ], + ) + def test_various_entry_names(self, tmp_path: Path, entry_name: str) -> None: + """Cache handles various entry name formats.""" + cache = PickleDataCache(tmp_path) + data = {"entry": entry_name} + + cache.save_cache_data(CacheSection.LIBRARY, entry_name, data) + result = cache.read_cache_data(CacheSection.LIBRARY, entry_name, dict) + + assert result == data + + def test_large_data(self, tmp_path: Path) -> None: + """Cache handles large data objects.""" + cache = PickleDataCache(tmp_path) + # Create ~1MB of data + data = {"items": list(range(100000)), "text": "x" * 500000} + + cache.save_cache_data(CacheSection.NAMESPACE, "large", data) + result = cache.read_cache_data(CacheSection.NAMESPACE, "large", dict) + + assert result == data + + def test_none_value(self, tmp_path: Path) -> None: + """Cache handles None values.""" + cache = PickleDataCache(tmp_path) + + cache.save_cache_data(CacheSection.LIBRARY, "none_test", None) + result = cache.read_cache_data(CacheSection.LIBRARY, "none_test", type(None)) + + assert result is None + + def test_empty_dict(self, tmp_path: Path) -> None: + """Cache handles empty dictionaries.""" + cache = PickleDataCache(tmp_path) + + cache.save_cache_data(CacheSection.LIBRARY, "empty", {}) + result = cache.read_cache_data(CacheSection.LIBRARY, "empty", dict) + + assert result == {} diff --git a/tests/robotcode/robot/diagnostics/test_imports_manager_cache.py b/tests/robotcode/robot/diagnostics/test_imports_manager_cache.py new file mode 100644 index 00000000..82de06cc --- /dev/null +++ b/tests/robotcode/robot/diagnostics/test_imports_manager_cache.py @@ -0,0 +1,180 @@ +"""Unit tests for imports_manager cache functionality.""" + +import zlib +from pathlib import Path + +import pytest + +from robotcode.robot.diagnostics.imports_manager import ( + RESOURCE_META_VERSION, + ResourceMetaData, +) + + +class TestResourceMetaData: + """Tests for ResourceMetaData dataclass.""" + + def test_create_metadata(self) -> None: + """ResourceMetaData can be created with all required fields.""" + meta = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source="/path/to/resource.resource", + mtime=1234567890123456789, + ) + + assert meta.meta_version == RESOURCE_META_VERSION + assert meta.source == "/path/to/resource.resource" + assert meta.mtime == 1234567890123456789 + + def test_filepath_base_property(self) -> None: + """filepath_base computes correct cache filename base.""" + meta = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source="/home/user/project/resources/common.resource", + mtime=0, + ) + + # Should be "adler32hash_stem" format + expected_hash = f"{zlib.adler32(b'/home/user/project/resources'):08x}" + assert meta.filepath_base == f"{expected_hash}_common" + + def test_filepath_base_different_paths(self) -> None: + """filepath_base generates unique hashes for different parent directories.""" + meta1 = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source="/path/a/resource.resource", + mtime=0, + ) + meta2 = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source="/path/b/resource.resource", + mtime=0, + ) + + # Different parent dirs should produce different hashes + assert meta1.filepath_base != meta2.filepath_base + # But both end with the same stem + assert meta1.filepath_base.endswith("_resource") + assert meta2.filepath_base.endswith("_resource") + + def test_filepath_base_same_name_different_dirs(self) -> None: + """Same filename in different directories produces different cache keys.""" + meta1 = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source="/project/tests/keywords.resource", + mtime=0, + ) + meta2 = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source="/project/lib/keywords.resource", + mtime=0, + ) + + assert meta1.filepath_base != meta2.filepath_base + + def test_metadata_equality(self) -> None: + """ResourceMetaData instances are equal when all fields match.""" + meta1 = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source="/path/to/resource.resource", + mtime=12345, + ) + meta2 = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source="/path/to/resource.resource", + mtime=12345, + ) + + assert meta1 == meta2 + + def test_metadata_inequality_different_mtime(self) -> None: + """ResourceMetaData instances differ when mtime differs.""" + meta1 = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source="/path/to/resource.resource", + mtime=12345, + ) + meta2 = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source="/path/to/resource.resource", + mtime=67890, + ) + + assert meta1 != meta2 + + def test_metadata_inequality_different_source(self) -> None: + """ResourceMetaData instances differ when source differs.""" + meta1 = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source="/path/a/resource.resource", + mtime=12345, + ) + meta2 = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source="/path/b/resource.resource", + mtime=12345, + ) + + assert meta1 != meta2 + + +class TestResourceMetaVersion: + """Tests for RESOURCE_META_VERSION constant.""" + + def test_meta_version_is_string(self) -> None: + """Meta version is a string.""" + assert isinstance(RESOURCE_META_VERSION, str) + assert len(RESOURCE_META_VERSION) > 0 + + def test_meta_version_value(self) -> None: + """Meta version has expected value.""" + assert RESOURCE_META_VERSION == "1" + + +class TestCacheKeyGeneration: + """Tests for cache key generation patterns.""" + + @pytest.mark.parametrize( + ("source", "expected_stem"), + [ + ("/path/to/test.resource", "_test"), + ("/path/to/common_keywords.resource", "_common_keywords"), + ("/path/to/My-Library.resource", "_My-Library"), + ("/path/日本語/テスト.resource", "_テスト"), + ], + ) + def test_cache_key_stem_extraction(self, source: str, expected_stem: str) -> None: + """Cache key correctly extracts filename stem.""" + meta = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source=source, + mtime=0, + ) + + assert meta.filepath_base.endswith(expected_stem) + + def test_cache_key_uses_adler32(self) -> None: + """Cache key uses zlib.adler32 for parent directory hash.""" + source = "/specific/path/to/resource.resource" + meta = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source=source, + mtime=0, + ) + + parent_path = str(Path(source).parent) + expected_hash = f"{zlib.adler32(parent_path.encode('utf-8')):08x}" + + assert meta.filepath_base.startswith(expected_hash) + + def test_cache_key_hash_length(self) -> None: + """Cache key hash portion is 8 hex characters (adler32).""" + meta = ResourceMetaData( + meta_version=RESOURCE_META_VERSION, + source="/any/path/file.resource", + mtime=0, + ) + + hash_part = meta.filepath_base.split("_")[0] + assert len(hash_part) == 8 + assert all(c in "0123456789abcdef" for c in hash_part) diff --git a/tests/robotcode/robot/diagnostics/test_namespace_cache.py b/tests/robotcode/robot/diagnostics/test_namespace_cache.py new file mode 100644 index 00000000..53a1d911 --- /dev/null +++ b/tests/robotcode/robot/diagnostics/test_namespace_cache.py @@ -0,0 +1,370 @@ +"""Unit tests for namespace caching data classes and serialization.""" + +import hashlib +import zlib +from pathlib import Path + +import pytest + +from robotcode.core.lsp.types import Position, Range +from robotcode.robot.diagnostics.namespace import ( + NAMESPACE_META_VERSION, + CachedLibraryEntry, + CachedResourceEntry, + CachedVariablesEntry, + Namespace, + NamespaceCacheData, + NamespaceMetaData, +) + + +class TestNamespaceMetaData: + """Tests for NamespaceMetaData dataclass.""" + + def test_create_metadata(self) -> None: + """NamespaceMetaData can be created with all required fields.""" + meta = NamespaceMetaData( + meta_version=NAMESPACE_META_VERSION, + source="/path/to/test.robot", + mtime=1234567890123456789, + file_size=1024, + content_hash="abc123", + library_sources_mtimes=(("/path/lib.py", 111),), + resource_sources_mtimes=(("/path/res.resource", 222),), + variables_sources_mtimes=(("/path/vars.py", 333),), + robot_version="7.0", + python_executable="/usr/bin/python3", + sys_path_hash="def456", + ) + + assert meta.meta_version == NAMESPACE_META_VERSION + assert meta.source == "/path/to/test.robot" + assert meta.mtime == 1234567890123456789 + assert meta.file_size == 1024 + assert meta.content_hash == "abc123" + + def test_metadata_is_frozen(self) -> None: + """NamespaceMetaData is immutable.""" + meta = NamespaceMetaData( + meta_version=NAMESPACE_META_VERSION, + source="/path/to/test.robot", + mtime=1234567890, + file_size=100, + content_hash="abc", + library_sources_mtimes=(), + resource_sources_mtimes=(), + variables_sources_mtimes=(), + robot_version="7.0", + python_executable="/usr/bin/python3", + sys_path_hash="def", + ) + + with pytest.raises(AttributeError): + meta.source = "/other/path" # type: ignore[misc] + + def test_filepath_base_property(self) -> None: + """filepath_base computes correct cache filename base.""" + meta = NamespaceMetaData( + meta_version=NAMESPACE_META_VERSION, + source="/home/user/project/tests/test_example.robot", + mtime=1234567890, + file_size=100, + content_hash="abc", + library_sources_mtimes=(), + resource_sources_mtimes=(), + variables_sources_mtimes=(), + robot_version="7.0", + python_executable="/usr/bin/python3", + sys_path_hash="def", + ) + + # Should be "adler32hash_stem" format + expected_hash = f"{zlib.adler32(b'/home/user/project/tests'):08x}" + assert meta.filepath_base == f"{expected_hash}_test_example" + + def test_filepath_base_with_different_paths(self) -> None: + """filepath_base generates unique hashes for different parent directories.""" + meta1 = NamespaceMetaData( + meta_version=NAMESPACE_META_VERSION, + source="/path/a/test.robot", + mtime=0, + file_size=0, + content_hash="", + library_sources_mtimes=(), + resource_sources_mtimes=(), + variables_sources_mtimes=(), + robot_version="7.0", + python_executable="", + sys_path_hash="", + ) + meta2 = NamespaceMetaData( + meta_version=NAMESPACE_META_VERSION, + source="/path/b/test.robot", + mtime=0, + file_size=0, + content_hash="", + library_sources_mtimes=(), + resource_sources_mtimes=(), + variables_sources_mtimes=(), + robot_version="7.0", + python_executable="", + sys_path_hash="", + ) + + # Different parent dirs should produce different hashes + assert meta1.filepath_base != meta2.filepath_base + # But both end with the same stem + assert meta1.filepath_base.endswith("_test") + assert meta2.filepath_base.endswith("_test") + + +class TestCachedEntryClasses: + """Tests for cached entry dataclasses.""" + + def test_cached_library_entry(self) -> None: + """CachedLibraryEntry can be created with all fields.""" + entry = CachedLibraryEntry( + name="Collections", + import_name="Collections", + library_doc_source="/path/to/collections.py", + args=(), + alias=None, + import_range=Range(start=Position(line=0, character=0), end=Position(line=0, character=11)), + import_source="/test.robot", + alias_range=Range.zero(), + ) + + assert entry.name == "Collections" + assert entry.import_name == "Collections" + assert entry.library_doc_source == "/path/to/collections.py" + + def test_cached_library_entry_with_alias(self) -> None: + """CachedLibraryEntry supports alias.""" + entry = CachedLibraryEntry( + name="MyAlias", + import_name="SomeLibrary", + library_doc_source="/path/to/lib.py", + args=("arg1", "arg2"), + alias="MyAlias", + import_range=Range.zero(), + import_source="/test.robot", + alias_range=Range(start=Position(line=0, character=20), end=Position(line=0, character=27)), + ) + + assert entry.alias == "MyAlias" + assert entry.args == ("arg1", "arg2") + + def test_cached_resource_entry(self) -> None: + """CachedResourceEntry includes imports and variables.""" + entry = CachedResourceEntry( + name="common", + import_name="resources/common.resource", + library_doc_source="/project/resources/common.resource", + args=(), + alias=None, + import_range=Range.zero(), + import_source="/test.robot", + alias_range=Range.zero(), + imports=(), + variables=(), + ) + + assert entry.name == "common" + assert entry.imports == () + assert entry.variables == () + + def test_cached_variables_entry(self) -> None: + """CachedVariablesEntry includes variables.""" + entry = CachedVariablesEntry( + name="vars", + import_name="variables.py", + library_doc_source="/project/variables.py", + args=(), + alias=None, + import_range=Range.zero(), + import_source="/test.robot", + alias_range=Range.zero(), + variables=(), + ) + + assert entry.name == "vars" + assert entry.variables == () + + def test_cached_entries_are_frozen(self) -> None: + """All cached entry types are immutable.""" + lib_entry = CachedLibraryEntry( + name="Test", + import_name="Test", + library_doc_source=None, + args=(), + alias=None, + import_range=Range.zero(), + import_source=None, + alias_range=Range.zero(), + ) + + with pytest.raises(AttributeError): + lib_entry.name = "Modified" # type: ignore[misc] + + +class TestNamespaceCacheData: + """Tests for NamespaceCacheData dataclass.""" + + def test_create_minimal_cache_data(self) -> None: + """NamespaceCacheData can be created with minimal data.""" + cache_data = NamespaceCacheData( + libraries=(), + resources=(), + resources_files=(), + variables_imports=(), + own_variables=(), + imports=(), + library_doc=None, + ) + + assert cache_data.libraries == () + assert cache_data.analyzed is False + assert cache_data.diagnostics == () + + def test_cache_data_with_analysis_results(self) -> None: + """NamespaceCacheData includes analysis data when analyzed=True.""" + cache_data = NamespaceCacheData( + libraries=(), + resources=(), + resources_files=(), + variables_imports=(), + own_variables=(), + imports=(), + library_doc=None, + analyzed=True, + diagnostics=(), + test_case_definitions=(), + tag_definitions=(), + namespace_references=(), + ) + + assert cache_data.analyzed is True + + def test_cache_data_is_frozen(self) -> None: + """NamespaceCacheData is immutable.""" + cache_data = NamespaceCacheData( + libraries=(), + resources=(), + resources_files=(), + variables_imports=(), + own_variables=(), + imports=(), + library_doc=None, + ) + + with pytest.raises(AttributeError): + cache_data.analyzed = True # type: ignore[misc] + + +class TestComputeContentHash: + """Tests for Namespace._compute_content_hash static method.""" + + def test_compute_hash_small_file(self, tmp_path: Path) -> None: + """Content hash is computed for small files (< 64KB).""" + test_file = tmp_path / "small.robot" + content = b"*** Test Cases ***\nTest\n Log Hello" + test_file.write_bytes(content) + + file_size, content_hash = Namespace._compute_content_hash(test_file) + + assert file_size == len(content) + assert len(content_hash) == 64 # SHA256 hex digest length + assert content_hash == hashlib.sha256(f"{len(content)}:".encode() + content).hexdigest() + + def test_compute_hash_large_file(self, tmp_path: Path) -> None: + """Content hash includes first and last 64KB for large files.""" + test_file = tmp_path / "large.robot" + # Create file > 64KB: 100KB of content + first_part = b"A" * 65536 + middle_part = b"B" * 20000 + last_part = b"C" * 65536 + content = first_part + middle_part + last_part + test_file.write_bytes(content) + + file_size, content_hash = Namespace._compute_content_hash(test_file) + + assert file_size == len(content) + # Verify hash includes size + first 64KB + last 64KB + expected_hasher = hashlib.sha256() + expected_hasher.update(f"{len(content)}:".encode()) + expected_hasher.update(first_part) + expected_hasher.update(content[-65536:]) # Last 64KB + assert content_hash == expected_hasher.hexdigest() + + def test_different_content_different_hash(self, tmp_path: Path) -> None: + """Different file content produces different hashes.""" + file1 = tmp_path / "file1.robot" + file2 = tmp_path / "file2.robot" + file1.write_bytes(b"Content A") + file2.write_bytes(b"Content B") + + _, hash1 = Namespace._compute_content_hash(file1) + _, hash2 = Namespace._compute_content_hash(file2) + + assert hash1 != hash2 + + def test_same_content_same_hash(self, tmp_path: Path) -> None: + """Same file content produces same hash.""" + file1 = tmp_path / "file1.robot" + file2 = tmp_path / "file2.robot" + content = b"Same content in both files" + file1.write_bytes(content) + file2.write_bytes(content) + + _, hash1 = Namespace._compute_content_hash(file1) + _, hash2 = Namespace._compute_content_hash(file2) + + assert hash1 == hash2 + + def test_append_detection(self, tmp_path: Path) -> None: + """Hash detects appended content (size change).""" + test_file = tmp_path / "test.robot" + original = b"Original content" + test_file.write_bytes(original) + size1, hash1 = Namespace._compute_content_hash(test_file) + + # Append content + test_file.write_bytes(original + b"\nAppended line") + size2, hash2 = Namespace._compute_content_hash(test_file) + + assert size2 > size1 + assert hash1 != hash2 + + def test_modification_detection(self, tmp_path: Path) -> None: + """Hash detects in-place modification (same size, different content).""" + test_file = tmp_path / "test.robot" + test_file.write_bytes(b"Original content here") + _, hash1 = Namespace._compute_content_hash(test_file) + + # Modify without changing size + test_file.write_bytes(b"Modified content here") + _, hash2 = Namespace._compute_content_hash(test_file) + + assert hash1 != hash2 + + def test_empty_file(self, tmp_path: Path) -> None: + """Hash handles empty files.""" + test_file = tmp_path / "empty.robot" + test_file.write_bytes(b"") + + file_size, content_hash = Namespace._compute_content_hash(test_file) + + assert file_size == 0 + assert len(content_hash) == 64 + + +class TestMetaVersion: + """Tests for namespace meta version constant.""" + + def test_meta_version_format(self) -> None: + """Meta version is a valid version string.""" + assert NAMESPACE_META_VERSION == "1.0" + # Verify it can be parsed as a version + parts = NAMESPACE_META_VERSION.split(".") + assert len(parts) == 2 + assert all(part.isdigit() for part in parts) From 59e0314f9d672d437999dce48e6d0e1189d155ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20Lepp=C3=A4nen?= Date: Thu, 15 Jan 2026 17:50:09 +0200 Subject: [PATCH 2/2] perf(language-server): add reverse index for keyword/variable references Optimize workspace-wide reference operations from O(D) to O(k) where D = total documents and k = documents actually referencing the target. Changes: - Add reverse index data structures in DocumentsCacheHelper to track which documents reference each keyword/variable - Use stable (source, name) tuple keys resilient to cache invalidation - Implement diff-based updates to handle removed references after edits - Add get_keyword_ref_users() and get_variable_ref_users() for O(1) lookup - Update Find References to use reverse index with workspace scan fallback - Update unused keyword/variable detection to use reverse index --- .../robotframework/parts/diagnostics.py | 47 ++++++++-- .../robotframework/parts/references.py | 49 +++++++---- .../diagnostics/document_cache_helper.py | 86 ++++++++++++++++++- 3 files changed, 154 insertions(+), 28 deletions(-) diff --git a/packages/language_server/src/robotcode/language_server/robotframework/parts/diagnostics.py b/packages/language_server/src/robotcode/language_server/robotframework/parts/diagnostics.py index 1a06a90d..450a06c2 100644 --- a/packages/language_server/src/robotcode/language_server/robotframework/parts/diagnostics.py +++ b/packages/language_server/src/robotcode/language_server/robotframework/parts/diagnostics.py @@ -18,8 +18,9 @@ EnvironmentVariableDefinition, GlobalVariableDefinition, LibraryArgumentDefinition, + VariableDefinition, ) -from robotcode.robot.diagnostics.library_doc import LibraryDoc +from robotcode.robot.diagnostics.library_doc import KeywordDoc, LibraryDoc from robotcode.robot.diagnostics.namespace import Namespace from ...common.parts.diagnostics import DiagnosticsCollectType, DiagnosticsResult @@ -138,6 +139,40 @@ def collect_namespace_diagnostics( ], ) + def _is_keyword_used_anywhere( + self, + document: TextDocument, + kw: KeywordDoc, + namespace: Namespace, + ) -> bool: + """Check if keyword is used anywhere, using index with safe fallback.""" + if self.parent.documents_cache.get_keyword_ref_users(kw): + return True + + if namespace.get_keyword_references().get(kw): + return True + + # Safe fallback: workspace scan if index might be incomplete + refs = self.parent.robot_references.find_keyword_references(document, kw, False, True) + return bool(refs) + + def _is_variable_used_anywhere( + self, + document: TextDocument, + var: VariableDefinition, + namespace: Namespace, + ) -> bool: + """Check if variable is used anywhere, using index with safe fallback.""" + if self.parent.documents_cache.get_variable_ref_users(var): + return True + + if namespace.get_variable_references().get(var): + return True + + # Safe fallback: workspace scan if index might be incomplete + refs = self.parent.robot_references.find_variable_references(document, var, False, True) + return bool(refs) + @language_id("robotframework") @_logger.call def collect_unused_keyword_references( @@ -166,10 +201,7 @@ def _collect_unused_keyword_references( for kw in (namespace.get_library_doc()).keywords.values(): check_current_task_canceled() - references = self.parent.robot_references.find_keyword_references( - document, kw, False, True - ) - if not references: + if not self._is_keyword_used_anywhere(document, kw, namespace): result.append( Diagnostic( range=kw.name_range, @@ -255,10 +287,7 @@ def _collect_unused_variable_references( ): continue - references = self.parent.robot_references.find_variable_references( - document, var, False, True - ) - if not references: + if not self._is_variable_used_anywhere(document, var, namespace): result.append( Diagnostic( range=var.name_range, diff --git a/packages/language_server/src/robotcode/language_server/robotframework/parts/references.py b/packages/language_server/src/robotcode/language_server/robotframework/parts/references.py index 3486781d..b2352f94 100644 --- a/packages/language_server/src/robotcode/language_server/robotframework/parts/references.py +++ b/packages/language_server/src/robotcode/language_server/robotframework/parts/references.py @@ -224,7 +224,7 @@ def _find_variable_references( include_declaration: bool = True, stop_at_first: bool = False, ) -> List[Location]: - result = [] + result: List[Location] = [] if include_declaration and variable.source: result.append(Location(str(Uri.from_path(variable.source)), variable.name_range)) @@ -232,15 +232,22 @@ def _find_variable_references( if variable.type == VariableDefinitionType.LOCAL_VARIABLE: result.extend(self.find_variable_references_in_file(document, variable, False)) else: - result.extend( - self._find_references_in_workspace( - document, - stop_at_first, - self.find_variable_references_in_file, - variable, - False, + # Use reverse index for lookup instead of workspace scan + docs_to_search = self.parent.documents_cache.get_variable_ref_users(variable) + if docs_to_search: + for doc in docs_to_search: + check_current_task_canceled() + result.extend(self.find_variable_references_in_file(doc, variable, False)) + if result and stop_at_first: + break + else: + # Fallback to workspace scan if index is empty + result.extend( + self._find_references_in_workspace( + document, stop_at_first, self.find_variable_references_in_file, variable, False + ) ) - ) + return result @_logger.call @@ -317,20 +324,26 @@ def _find_keyword_references( include_declaration: bool = True, stop_at_first: bool = False, ) -> List[Location]: - result = [] + result: List[Location] = [] if include_declaration and kw_doc.source: result.append(Location(str(Uri.from_path(kw_doc.source)), kw_doc.range)) - result.extend( - self._find_references_in_workspace( - document, - stop_at_first, - self.find_keyword_references_in_file, - kw_doc, - False, + # Use reverse index for lookup instead of workspace scan + docs_to_search = self.parent.documents_cache.get_keyword_ref_users(kw_doc) + if docs_to_search: + for doc in docs_to_search: + check_current_task_canceled() + result.extend(self.find_keyword_references_in_file(doc, kw_doc, False)) + if result and stop_at_first: + break + else: + # Fallback to workspace scan if index is empty + result.extend( + self._find_references_in_workspace( + document, stop_at_first, self.find_keyword_references_in_file, kw_doc, False + ) ) - ) return result diff --git a/packages/robot/src/robotcode/robot/diagnostics/document_cache_helper.py b/packages/robot/src/robotcode/robot/diagnostics/document_cache_helper.py index 2ca76abb..278fbc80 100644 --- a/packages/robot/src/robotcode/robot/diagnostics/document_cache_helper.py +++ b/packages/robot/src/robotcode/robot/diagnostics/document_cache_helper.py @@ -30,8 +30,9 @@ from ..utils import get_robot_version from ..utils.stubs import Languages from .data_cache import CacheSection +from .entities import VariableDefinition from .imports_manager import ImportsManager -from .library_doc import LibraryDoc +from .library_doc import KeywordDoc, LibraryDoc from .namespace import DocumentType, Namespace, NamespaceCacheData, NamespaceMetaData from .workspace_config import ( AnalysisDiagnosticModifiersConfig, @@ -99,6 +100,18 @@ def __init__( self._variables_users_lock = threading.RLock() self._variables_users: dict[str, weakref.WeakSet[TextDocument]] = {} + # Reference tracking for O(1) lookup of keyword/variable usages + # Uses (source, name) tuples as keys for stability across cache invalidation + self._ref_tracking_lock = threading.RLock() + self._keyword_ref_users: dict[tuple[str, str], weakref.WeakSet[TextDocument]] = {} + self._variable_ref_users: dict[tuple[str, str], weakref.WeakSet[TextDocument]] = {} + self._doc_keyword_refs: weakref.WeakKeyDictionary[ + TextDocument, set[tuple[str, str]] + ] = weakref.WeakKeyDictionary() + self._doc_variable_refs: weakref.WeakKeyDictionary[ + TextDocument, set[tuple[str, str]] + ] = weakref.WeakKeyDictionary() + # Counter for periodic cleanup of stale dependency map entries self._track_count = 0 @@ -520,6 +533,8 @@ def __namespace_initialized(self, sender: Namespace) -> None: def __namespace_analysed(self, sender: Namespace) -> None: """Re-save namespace to cache after analysis to include diagnostics and analysis results.""" if sender.document is not None: + self._track_references(sender.document, sender) + imports_manager = self.get_imports_manager(sender.document) self._save_namespace_to_cache(sender, imports_manager) @@ -585,6 +600,22 @@ def get_variables_users(self, variables_doc: LibraryDoc) -> list[TextDocument]: return list(self._variables_users[var_key]) return [] + def get_keyword_ref_users(self, kw_doc: KeywordDoc) -> list[TextDocument]: + """Get documents that reference a keyword.""" + with self._ref_tracking_lock: + key = (kw_doc.source or "", kw_doc.name) + if key in self._keyword_ref_users: + return list(self._keyword_ref_users[key]) + return [] + + def get_variable_ref_users(self, var_def: VariableDefinition) -> list[TextDocument]: + """Get documents that reference a variable.""" + with self._ref_tracking_lock: + key = (var_def.source or "", var_def.name) + if key in self._variable_ref_users: + return list(self._variable_ref_users[key]) + return [] + def _cleanup_stale_dependency_maps(self) -> None: """Remove entries with empty WeakSets from dependency maps. @@ -606,6 +637,59 @@ def _cleanup_stale_dependency_maps(self) -> None: for var_key in stale_var_keys: del self._variables_users[var_key] + with self._ref_tracking_lock: + stale_kw_ref_keys = [k for k, v in self._keyword_ref_users.items() if len(v) == 0] + for kw_ref_key in stale_kw_ref_keys: + del self._keyword_ref_users[kw_ref_key] + + stale_var_ref_keys = [k for k, v in self._variable_ref_users.items() if len(v) == 0] + for var_ref_key in stale_var_ref_keys: + del self._variable_ref_users[var_ref_key] + + def _track_references(self, document: TextDocument, namespace: Namespace) -> None: + """Track keyword/variable references. + + Uses diff-based updates: compares current references against previous + to handle documents that stop referencing items after edits. + """ + with self._ref_tracking_lock: + self._update_keyword_refs(document, namespace) + self._update_variable_refs(document, namespace) + + def _update_keyword_refs(self, document: TextDocument, namespace: Namespace) -> None: + """Update reverse index for keyword references.""" + keyword_refs = namespace.get_keyword_references() + new_keys = {(kw.source or "", kw.name) for kw in keyword_refs} + old_keys = self._doc_keyword_refs.get(document, set()) + + for key in old_keys - new_keys: + if key in self._keyword_ref_users: + self._keyword_ref_users[key].discard(document) + + for key in new_keys - old_keys: + if key not in self._keyword_ref_users: + self._keyword_ref_users[key] = weakref.WeakSet() + self._keyword_ref_users[key].add(document) + + self._doc_keyword_refs[document] = new_keys + + def _update_variable_refs(self, document: TextDocument, namespace: Namespace) -> None: + """Update reverse index for variable references.""" + variable_refs = namespace.get_variable_references() + new_keys = {(var.source or "", var.name) for var in variable_refs} + old_keys = self._doc_variable_refs.get(document, set()) + + for key in old_keys - new_keys: + if key in self._variable_ref_users: + self._variable_ref_users[key].discard(document) + + for key in new_keys - old_keys: + if key not in self._variable_ref_users: + self._variable_ref_users[key] = weakref.WeakSet() + self._variable_ref_users[key].add(document) + + self._doc_variable_refs[document] = new_keys + def get_initialized_namespace(self, document: TextDocument) -> Namespace: result: Namespace | None = document.get_data(self.INITIALIZED_NAMESPACE) if result is None: