diff --git a/.jules/bolt.md b/.jules/bolt.md index 1389df8..bc29da8 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -53,3 +53,7 @@ ## 2026-02-11 - Multi-Metric Aggregate Queries **Learning:** Executing multiple separate `count()` queries to gather system statistics results in multiple database round-trips and redundant table scans. **Action:** Use a single SQLAlchemy query with `func.count()` and `func.sum(case(...))` to calculate all metrics in one go. This reduces network overhead and allows the database to perform calculations in a single pass. + +## 2026-02-12 - Regex Caching and I/O Throttling in Priority Engine +**Learning:** Repeatedly calling `re.search` on string patterns and unthrottled `os.path.getmtime` calls in hot paths (like every `analyze` request) introduces measurable latency. Pre-compiling regexes and implementing a time-based (e.g., 5s) throttle for configuration reloads significantly improves throughput. +**Action:** Use `re.compile()` for static patterns and implement time-based throttling for any file-based configuration hot-reloading to avoid filesystem bottlenecks. Ensure the consumer (e.g., `PriorityEngine`) always triggers the reload check before checking cache freshness. diff --git a/backend/adaptive_weights.py b/backend/adaptive_weights.py index 944618b..9b58cc2 100644 --- a/backend/adaptive_weights.py +++ b/backend/adaptive_weights.py @@ -12,6 +12,7 @@ class AdaptiveWeights: _instance = None _weights = None _last_loaded = 0 + _last_check_time = 0 def __new__(cls): if cls._instance is None: @@ -40,8 +41,11 @@ def _load_weights(self): self._weights = {} def _check_reload(self): - # Optimization: Checking mtime is fast (stat call). - self._load_weights() + # Optimization: 5-second throttle to prevent excessive I/O in hot paths. + now = time.time() + if now - self._last_check_time > 5: + self._last_check_time = now + self._load_weights() def _save_weights(self): try: diff --git a/backend/priority_engine.py b/backend/priority_engine.py index 0890cc9..c54b303 100644 --- a/backend/priority_engine.py +++ b/backend/priority_engine.py @@ -13,7 +13,26 @@ class PriorityEngine: def __init__(self): # We no longer hardcode values here. # They are fetched dynamically from AdaptiveWeights on each analysis. - pass + self._regex_cache = [] + self._last_loaded_time = 0 + + def _get_compiled_patterns(self): + """ + Retrieves pre-compiled regex patterns for urgency calculations. + Invalidates cache if adaptive weights have been updated. + """ + # Ensure latest weights are at least checked for reload (subject to throttling) + adaptive_weights._check_reload() + + current_load_time = adaptive_weights._last_loaded + if not self._regex_cache or current_load_time > self._last_loaded_time: + urgency_patterns = adaptive_weights.get_urgency_patterns() + self._regex_cache = [ + (re.compile(pattern, re.IGNORECASE), weight) + for pattern, weight in urgency_patterns + ] + self._last_loaded_time = current_load_time + return self._regex_cache def analyze(self, text: str, image_labels: Optional[List[str]] = None) -> Dict[str, Any]: """ @@ -116,13 +135,15 @@ def _calculate_urgency(self, text: str, severity_score: int): urgency = severity_score reasons = [] - urgency_patterns = adaptive_weights.get_urgency_patterns() + # Optimization: Use pre-compiled regex patterns to improve performance + compiled_patterns = self._get_compiled_patterns() # Apply regex modifiers - for pattern, weight in urgency_patterns: - if re.search(pattern, text): + for pattern_obj, weight in compiled_patterns: + match = pattern_obj.search(text) + if match: urgency += weight - reasons.append(f"Urgency increased by context matching pattern: '{pattern}'") + reasons.append(f"Urgency increased by context matching pattern: '{pattern_obj.pattern}'") # Cap at 100 urgency = min(100, urgency) diff --git a/backend/requirements-render.txt b/backend/requirements-render.txt index f411143..774ad5a 100644 --- a/backend/requirements-render.txt +++ b/backend/requirements-render.txt @@ -15,9 +15,12 @@ a2wsgi python-jose[cryptography] passlib[bcrypt] bcrypt<4.0.0 +python-dotenv SpeechRecognition pydub googletrans==4.0.2 langdetect +async_lru +indic-nlp-library numpy scikit-learn diff --git a/backend_output.txt b/backend_output.txt index d598797..0b98da5 100644 --- a/backend_output.txt +++ b/backend_output.txt @@ -1,6 +1,13 @@ -INFO: Will watch for changes in these directories: ['E:\\projects\\VishwaGuru\\VishwaGuru'] -INFO: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit) -INFO: Started reloader process [42100] using StatReload -2026-03-09 02:00:56,311 - backend.adaptive_weights - INFO - Adaptive weights loaded/reloaded. -WARNING: StatReload detected changes in 'backend\main.py'. Reloading... - \ No newline at end of file +2026-03-09 15:06:36,764 - backend.adaptive_weights - INFO - Adaptive weights loaded/reloaded. +2026-03-09 15:06:36,834 - backend.rag_service - INFO - Loaded 5 civic policies for RAG. +/home/jules/.pyenv/versions/3.12.12/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work + warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning) +2026-03-09 15:06:37,177 - backend.main - WARNING - FRONTEND_URL not set. Defaulting to http://localhost:5173 for development. +INFO: Started server process [9775] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://0.0.0.0:10000 (Press CTRL+C to quit) +INFO: Shutting down +INFO: Waiting for application shutdown. +INFO: Application shutdown complete. +INFO: Finished server process [9775] diff --git a/benchmark_priority.py b/benchmark_priority.py new file mode 100644 index 0000000..3a1e350 --- /dev/null +++ b/benchmark_priority.py @@ -0,0 +1,29 @@ +import time +import sys +import os +import re + +# Add project root to sys.path +sys.path.append(os.getcwd()) + +from backend.priority_engine import priority_engine +from backend.adaptive_weights import adaptive_weights + +def benchmark(iterations=1000): + text = "Immediate help needed! There is a fire near the hospital and a child is trapped." + + # Warm up + priority_engine.analyze(text) + + start = time.time() + for _ in range(iterations): + priority_engine.analyze(text) + end = time.time() + + print(f"Iterations: {iterations}") + print(f"Total time: {end - start:.4f} seconds") + print(f"Average time per analysis: {(end - start) / iterations * 1000:.4f} ms") + +if __name__ == "__main__": + print("Running benchmark...") + benchmark()