From 0485bfdc0f6d71d939a030df06b3ac54141cf5ce Mon Sep 17 00:00:00 2001 From: NicolasLMP Date: Tue, 29 Jul 2025 17:23:05 +0200 Subject: [PATCH 1/7] feat : add compatibility of the new query reformulation to the evaluation method and the cli usages --- rag_evaluation/core/isschat_client.py | 35 +++++++++++++++-- .../evaluators/generation_evaluator.py | 7 ++-- src/cli/commands/chat.py | 8 ++-- src/cli/commands/status.py | 26 ++++++------- src/webapp/app.py | 39 ++++++++++++++++++- 5 files changed, 88 insertions(+), 27 deletions(-) diff --git a/rag_evaluation/core/isschat_client.py b/rag_evaluation/core/isschat_client.py index 652b9eb..efef5b6 100644 --- a/rag_evaluation/core/isschat_client.py +++ b/rag_evaluation/core/isschat_client.py @@ -35,12 +35,15 @@ def __init__(self, conversation_memory: bool = False): def query(self, question: str, context: Optional[str] = None) -> Tuple[str, float, List[Dict[str, str]]]: start_time = time.time() try: - # Use provided context or build contextual question - contextual_question = question + # Format history for reformulation service if context provided + history = "" if context: - contextual_question = f"Contexte de conversation:\n{context}\n\nQuestion actuelle: {question}" + history = self._format_context_as_history(context) + elif self.conversation_memory and self.conversation_history: + history = self._format_conversation_history() - response, sources = self.rag_pipeline.process_query(contextual_question, verbose=False) + # Use new API signature with history parameter + response, sources = self.rag_pipeline.process_query(query=question, history=history, verbose=False) response_time = time.time() - start_time # Store in conversation history if memory is enabled @@ -69,6 +72,30 @@ def _parse_sources_string(self, sources_str: str) -> List[Dict[str, str]]: sources.append({"title": title, "url": url}) return sources + def _format_context_as_history(self, context: str) -> str: + """Format evaluation context as conversation history for reformulation service""" + if not context or not context.strip(): + return "" + + # If context already looks like formatted history, return as is + if "User:" in context and "Assistant:" in context: + return context + + # Otherwise, treat as single assistant message + return f"Assistant: {context.strip()}" + + def _format_conversation_history(self) -> str: + """Format stored conversation history for reformulation service""" + if not self.conversation_history: + return "" + + history_lines = [] + for exchange in self.conversation_history: + history_lines.append(f"User: {exchange['question']}") + history_lines.append(f"Assistant: {exchange['response']}") + + return "\n".join(history_lines) + def health_check(self) -> bool: """Check if Isschat is responding properly""" try: diff --git a/rag_evaluation/evaluators/generation_evaluator.py b/rag_evaluation/evaluators/generation_evaluator.py index 494586f..2b06cc9 100644 --- a/rag_evaluation/evaluators/generation_evaluator.py +++ b/rag_evaluation/evaluators/generation_evaluator.py @@ -86,7 +86,7 @@ def _store_conversation_turn(self, test_case: TestCase, response: str): self.conversation_history[conversation_id].append({"question": test_case.question, "response": response}) def _build_context_string(self, test_case: TestCase) -> Optional[str]: - """Build context string from conversation context for IsschatClient""" + """Build context string from conversation context for IsschatClient (reformulation-compatible format)""" if not test_case.conversation_context: return None @@ -114,8 +114,9 @@ def _build_context_string(self, test_case: TestCase) -> Optional[str]: pass if question and response: - context_parts.append(f"Q: {question}") - context_parts.append(f"R: {response}") + # Use User:/Assistant: format compatible with reformulation service + context_parts.append(f"User: {question}") + context_parts.append(f"Assistant: {response}") return "\n".join(context_parts) if context_parts else None diff --git a/src/cli/commands/chat.py b/src/cli/commands/chat.py index 6b78182..68ed2ac 100644 --- a/src/cli/commands/chat.py +++ b/src/cli/commands/chat.py @@ -162,14 +162,14 @@ def _show_history(self): click.echo() def _build_history_context(self) -> str: - """Build history context for the query""" + """Build history context for the query (reformulation-compatible format)""" if len(self.history) <= 1: return "" context_parts = [] - for item in self.history[-2:]: - context_parts.append(f"Q: {item['question']}") - context_parts.append(f"R: {item['answer']}") + for item in self.history[-3:]: # Use last 3 exchanges for better context + context_parts.append(f"User: {item['question']}") + context_parts.append(f"Assistant: {item['answer']}") return "\n".join(context_parts) diff --git a/src/cli/commands/status.py b/src/cli/commands/status.py index 5d9fc3e..9a133e2 100644 --- a/src/cli/commands/status.py +++ b/src/cli/commands/status.py @@ -120,31 +120,29 @@ def _check_rag_status(verbose: bool) -> Dict[str, Any]: try: pipeline = SemanticRAGPipelineFactory.create_semantic_pipeline() - # Test du pipeline - test_results = pipeline.check_pipeline() - - # Detailed status + # Get pipeline status (more efficient than running test queries) status_info = pipeline.get_status() + pipeline_ready = pipeline.is_ready() checks = { - "Pipeline ready": test_results.get("success", False), - "Retrieval tool": status_info.get("retrieval_tool", {}).get("ready", False), + "Pipeline ready": pipeline_ready, + "Retrieval tool": status_info.get("semantic_retrieval_tool", {}).get("ready", False), "Generation tool": status_info.get("generation_tool", {}).get("ready", False), } - success = test_results.get("success", False) - details = {} if verbose: details = { - "pipeline_ready": status_info.get("ready", False), - "test_query": test_results.get("test_query", ""), - "response_time_ms": test_results.get("response_time_ms", 0), - "vector_db_count": status_info.get("retrieval_tool", {}).get("vector_db", {}).get("points_count", 0), - "error": test_results.get("error"), + "pipeline_ready": pipeline_ready, + "semantic_features_enabled": status_info.get("semantic_features_enabled", False), + "vector_db_count": status_info.get("semantic_retrieval_tool", {}) + .get("vector_db", {}) + .get("points_count", 0), + "reformulation_service": "available" if pipeline.reformulation_service.is_ready() else "not configured", + "capabilities": list(status_info.get("capabilities", {}).keys()), } - return {"success": success, "checks": checks, "details": details} + return {"success": pipeline_ready, "checks": checks, "details": details} except Exception as e: return {"success": False, "error": str(e), "checks": {"Pipeline RAG": False}} diff --git a/src/webapp/app.py b/src/webapp/app.py index 1fda614..73172ca 100644 --- a/src/webapp/app.py +++ b/src/webapp/app.py @@ -9,12 +9,41 @@ from datetime import datetime import uuid from typing import Optional +import random sys.path.append(str(Path(__file__).parent.parent.parent)) os.environ["TOKENIZERS_PARALLELISM"] = "false" + +def get_random_loading_message(): + """Get a random loading message to show variety""" + messages = [ + "Analyse en cours...", + "Recherche dans la documentation...", + "Traitement de votre requête...", + "Analyse de votre question...", + "Recherche d'informations pertinentes...", + "Consultation de la base de connaissances...", + "Formulation d'une réponse...", + "Recherche sur votre sujet...", + "Examen de la documentation...", + "Recherche de la meilleure réponse...", + "Exploration des ressources disponibles...", + "Analyse de votre demande...", + "Collecte d'informations...", + "Compilation des données...", + "Traitement de votre demande...", + "Préparation de la réponse...", + "Consultation des sources...", + "Réflexion en cours...", + "Génération de la réponse...", + "Finalisation de l'analyse...", + ] + return random.choice(messages) + + try: asyncio.get_running_loop() except RuntimeError: @@ -415,8 +444,11 @@ def format_chat_history(conversation_id: str): # Note: History is now used for query reformulation within the pipeline, not for generation chat_history = format_chat_history(st.session_state["current_conversation_id"]) + # Show loading message immediately with variety + loading_message = get_random_loading_message() + # Process the question with all features - with st.spinner("Analysis in progress..."): + with st.spinner(loading_message): result, sources = process_question_with_model( model, features, prompt, chat_history, st.session_state["current_conversation_id"], start_time ) @@ -478,12 +510,15 @@ def handle_prompt_click(prompt_text): ) st.chat_message("user", avatar=IMAGES["user"]).write(prompt) + # Show loading message immediately with variety + loading_message = get_random_loading_message() + # Prepare chat history for context from the data manager # Note: History is now used for query reformulation within the pipeline, not for generation chat_history = format_chat_history(st.session_state["current_conversation_id"]) # Process the question with all features - with st.spinner("Analysis in progress..."): + with st.spinner(loading_message): result, sources = process_question_with_model( model, features, prompt, chat_history, st.session_state["current_conversation_id"], start_time ) From ea694730875556171296d975695407c323251152 Mon Sep 17 00:00:00 2001 From: NicolasLMP Date: Tue, 29 Jul 2025 17:36:57 +0200 Subject: [PATCH 2/7] feat : history should be not checked for the first question --- src/webapp/app.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/webapp/app.py b/src/webapp/app.py index 73172ca..f080c63 100644 --- a/src/webapp/app.py +++ b/src/webapp/app.py @@ -13,9 +13,19 @@ sys.path.append(str(Path(__file__).parent.parent.parent)) - os.environ["TOKENIZERS_PARALLELISM"] = "false" +try: + asyncio.get_running_loop() +except RuntimeError: + pass + +from src.rag.semantic_pipeline import SemanticRAGPipelineFactory +from src.webapp.components.features_manager import FeaturesManager +from src.webapp.components.history_manager import get_history_manager +from src.webapp.auth.azure_auth import AzureADAuth +from src.webapp.example_prompts import EXAMPLE_PROMPTS + def get_random_loading_message(): """Get a random loading message to show variety""" @@ -44,17 +54,6 @@ def get_random_loading_message(): return random.choice(messages) -try: - asyncio.get_running_loop() -except RuntimeError: - pass - -from src.rag.semantic_pipeline import SemanticRAGPipelineFactory -from src.webapp.components.features_manager import FeaturesManager -from src.webapp.components.history_manager import get_history_manager -from src.webapp.auth.azure_auth import AzureADAuth -from src.webapp.example_prompts import EXAMPLE_PROMPTS - # images paths IMAGES = { "user": str(Path(__file__).parent.parent.parent / "Images" / "user.svg"), @@ -358,7 +357,10 @@ def chat_page(): # Helper to format chat history for prompt def format_chat_history(conversation_id: str): - from src.storage.data_manager import get_data_manager + # Quick check: if this is the first question in session, skip database query + if len(st.session_state.get("messages", [])) <= 1: + print("📚 HISTORY: First question - skipping database query") + return "" data_manager = get_data_manager() # Fetch entries for the current conversation_id (no artificial limit) @@ -513,12 +515,12 @@ def handle_prompt_click(prompt_text): # Show loading message immediately with variety loading_message = get_random_loading_message() - # Prepare chat history for context from the data manager - # Note: History is now used for query reformulation within the pipeline, not for generation - chat_history = format_chat_history(st.session_state["current_conversation_id"]) - - # Process the question with all features + # Process the question with all features - spinner covers everything with st.spinner(loading_message): + # Prepare chat history for context from the data manager + # Note: History is now used for query reformulation within the pipeline, not for generation + chat_history = format_chat_history(st.session_state["current_conversation_id"]) + result, sources = process_question_with_model( model, features, prompt, chat_history, st.session_state["current_conversation_id"], start_time ) From bcf7206458de4cb0aae16a2f30c9d21fc5f6b690 Mon Sep 17 00:00:00 2001 From: NicolasLMP Date: Tue, 29 Jul 2025 17:52:59 +0200 Subject: [PATCH 3/7] feat : readme updated --- README.md | 108 +++++++++++++++++++++++++----------------------------- 1 file changed, 49 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 41c9c98..7aca650 100644 --- a/README.md +++ b/README.md @@ -5,33 +5,48 @@ A chatbot that provides semantic search and conversational AI capabilities for Confluence knowledge bases using advanced RAG (Retrieval-Augmented Generation) technology with semantic understanding. -## Key Features +## Table of Contents + +- [Features](#features) +- [Installation](#installation) +- [Launch](#launch) + - [Web Interface](#web-interface) + - [Command Line Interface](#command-line-interface-cli) + - [Evaluation System](#evaluation-system) +- [Architecture](#architecture) +- [Production Deployment](#production-deployment) + - [Azure Cloud Deployment](#azure-cloud-deployment) + - [Docker Deployment](#docker-deployment) + - [Local Development](#local-development) + - [Testing](#testing) +- [License](#license) + +## Features ### Core RAG Capabilities -- **Semantic RAG Pipeline**: Advanced semantic understanding with query reformulation -- **Dual Pipeline Architecture**: Both standard and semantic-enhanced RAG processing +- **Semantic RAG Pipeline**: Advanced semantic understanding with query reformulation and coreference resolution +- **Dual Pipeline Architecture**: Both standard and semantic-enhanced RAG processing with intelligent fallback +- **Intent Classification**: Automatically detects and routes queries based on intent (team_info, project_info, technical_info, feature_info) +- **Query Reformulation**: LLM-based coreference resolution using conversation context for ambiguous queries - **Intelligent Query Processing**: Handles misleading keywords and provides contextually accurate responses -- **Vector Retrieval**: Direct vector search with optional semantic re-ranking -- **Semantic Re-ranking**: Re-ranks results based on semantic similarity and intent matching - -### Advanced Semantic Features -- **Intent Classification**: Automatically detects query intent (team_info, project_info, technical_info, feature_info) -- **Query Reformulation**: LLM-based coreference resolution and context clarification -- **Multilingual Support**: Optimized for French and English content with synonym handling +- **Vector Retrieval**: Direct vector search with semantic re-ranking based on similarity and intent matching +- **Multilingual Support**: Optimized for French and English content with cross-language understanding and synonym handling - **Contextual Understanding**: Maintains document hierarchy awareness for numerical and structured queries -### Enterprise Features +### Enterprise Operations - **Azure AD Authentication**: Secure OAuth 2.0 integration with domain validation -- **Performance Monitoring**: Real-time metrics tracking and analytics dashboard -- **Conversation History**: Persistent chat history with search capabilities -- **User Feedback System**: Integrated feedback collection and analysis -- **Admin Dashboard**: System management and configuration interface +- **Performance Monitoring**: Real-time response time, accuracy metrics, and system health tracking +- **Conversation History**: Persistent chat history with search capabilities and conversation context +- **User Feedback System**: Integrated feedback collection with sentiment analysis and analytics +- **Admin Dashboard**: System management with performance insights, user analytics, and configuration interface -### Evaluation & Quality Assurance +### Quality Assurance & Evaluation - **Comprehensive Evaluation Framework**: Multi-category testing (retrieval, generation, business value, robustness) -- **LLM-based Automated Evaluation**: Automated scoring with confidence metrics -- **CI/CD Integration**: Automated testing with configurable quality thresholds -- **Performance Metrics**: Response time, accuracy, and user satisfaction tracking +- **LLM-based Automated Evaluation**: Automated quality assessment using advanced language models with confidence metrics +- **CI/CD Integration**: Automated testing pipeline with configurable quality thresholds +- **Performance Benchmarking**: Continuous performance monitoring and improvement tracking +- **Adaptive Chunking**: Content-type aware document processing with hierarchical chunking +- **Flexible Vector Storage**: Weaviate cloud integration with automated data pipeline and batch optimization ## Installation @@ -119,22 +134,22 @@ Isschat provides a powerful CLI tool for managing and querying your knowledge ba - **Status Check**: Check system components and configuration ```bash - uv run python -m src.cli.main status [--verbose] [--component config|ingestion|rag|all] + uv run -m src.cli.main status [--verbose] [--component config|ingestion|rag|all] ``` - **Data Ingestion**: Build or update the vector database from Confluence ```bash - uv run python -m src.cli.main ingest [--source confluence] [--force-rebuild] [--verbose] + uv run -m src.cli.main ingest [--source confluence] [--force-rebuild] [--verbose] ``` - **Interactive Chat**: Start a chat session without the web interface ```bash - uv run python -m src.cli.main chat [--user-id cli_user] + uv run -m src.cli.main chat [--user-id cli_user] ``` - **Direct Query**: Query the vector database with detailed results ```bash - uv run python -m src.cli.main query -q "your question" [options] + uv run -m src.cli.main query -q "your question" [options] ``` #### Query Command Options @@ -152,19 +167,19 @@ Isschat provides a powerful CLI tool for managing and querying your knowledge ba ```bash # Check system status and configuration -uv run python -m src.cli.main status --verbose +uv run -m src.cli.main status --verbose # Ingest data from Confluence -uv run python -m src.cli.main ingest --source confluence --verbose +uv run -m src.cli.main ingest --source confluence --verbose # Start interactive chat session -uv run python -m src.cli.main chat +uv run -m src.cli.main chat # Query with detailed information -uv run python -m src.cli.main query -q "How to configure authentication?" -k 3 --show-metadata --show-stats +uv run -m src.cli.main query -q "How to configure authentication?" -k 3 --show-metadata --show-stats # Query without LLM generation (retrieval only) -uv run python -m src.cli.main query -q "project management" --no-llm --show-stats +uv run -m src.cli.main query -q "project management" --no-llm --show-stats ``` ### Evaluation System @@ -172,14 +187,16 @@ uv run python -m src.cli.main query -q "project management" --no-llm --show-stat Run comprehensive RAG evaluation: ```bash +# View evaluation dashboard +uv run rag_evaluation/evaluation_dashboard.py + # Run all evaluation categories -uv run python rag_evaluation/run_evaluation.py +uv run rag_evaluation/run_evaluation.py # Run specific evaluation category -uv run python rag_evaluation/run_evaluation.py --category retrieval +uv run rag_evaluation/run_evaluation.py --category retrieval + -# View evaluation dashboard -uv run python rag_evaluation/evaluation_dashboard.py ``` @@ -250,33 +267,6 @@ Isschat/ - **Enterprise Security**: Azure AD integration with domain validation - **CLI and Web Interfaces**: Both command-line and web-based interactions -## Advanced Features - -### Semantic Intelligence -- **Intent Classification**: Automatically detects and routes queries based on intent (team_info, project_info, technical_info, feature_info) -- **Query Reformulation**: LLM-based coreference resolution using conversation context -- **Context-Aware Retrieval**: Maintains document hierarchy awareness for complex queries -- **Multilingual Processing**: Optimized for French and English content with cross-language understanding - -### Enterprise Operations -- **Conversation Analytics**: Advanced user interaction tracking and analysis -- **Performance Monitoring**: Real-time response time, accuracy metrics, and system health -- **Feedback Loop**: Integrated user feedback collection with sentiment analysis -- **Query History**: Persistent search history with conversation context -- **Admin Dashboard**: System management with performance insights and user analytics - -### Quality Assurance -- **Comprehensive RAG Evaluation**: Multi-category testing framework for retrieval, generation, business value, and robustness -- **LLM-based Evaluation**: Automated quality assessment using advanced language models -- **CI/CD Integration**: Automated testing pipeline with configurable quality thresholds -- **Performance Benchmarking**: Continuous performance monitoring and improvement tracking - -### Technical Capabilities -- **Flexible Vector Storage**: Weaviate cloud -- **Adaptive Chunking**: Content-type aware document processing with hierarchical chunking -- **Semantic Re-ranking**: Advanced result re-ranking based on semantic similarity and intent matching -- **Automated Data Pipeline**: Streamlined document processing and embedding generation with batch optimization - ## Production Deployment ### Azure Cloud Deployment From 9943522abbcc595bf1e9f57c056c38c1a6fba3ec Mon Sep 17 00:00:00 2001 From: NicolasLMP Date: Tue, 29 Jul 2025 17:58:24 +0200 Subject: [PATCH 4/7] fix : delete deprecated test --- src/rag/tools/semantic_retrieval_tool.py | 43 ------------------------ 1 file changed, 43 deletions(-) diff --git a/src/rag/tools/semantic_retrieval_tool.py b/src/rag/tools/semantic_retrieval_tool.py index 665bb06..37b322e 100644 --- a/src/rag/tools/semantic_retrieval_tool.py +++ b/src/rag/tools/semantic_retrieval_tool.py @@ -212,46 +212,3 @@ def get_stats(self) -> Dict[str, Any]: except Exception as e: return {"type": "vector_retrieval_tool", "ready": False, "error": str(e)} - - def test_vector_retrieval(self, test_query: str = "qui sont les collaborateurs sur Isschat") -> Dict[str, Any]: - """Test vector retrieval with optional semantic re-ranking""" - try: - self._initialize() - - if not self.is_ready(): - return {"success": False, "error": "Vector DB empty or not accessible"} - - # Test with semantic re-ranking enabled - reranked_results = self.retrieve(test_query, k=5, use_semantic_reranking=True) - - # Test with semantic re-ranking disabled (direct vector retrieval) - direct_results = self.retrieve(test_query, k=5, use_semantic_reranking=False) - - return { - "success": True, - "query": test_query, - "reranked_results": { - "count": len(reranked_results), - "scores": [r.score for r in reranked_results], - "sample_content": reranked_results[0].content[:200] + "..." if reranked_results else None, - }, - "direct_results": { - "count": len(direct_results), - "scores": [r.score for r in direct_results], - "sample_content": direct_results[0].content[:200] + "..." if direct_results else None, - }, - "improvement": { - "score_improvement": ( - (reranked_results[0].score - direct_results[0].score) - if reranked_results and direct_results - else 0 - ), - "reranking_helps": ( - len(reranked_results) > 0 - and (not direct_results or reranked_results[0].score > direct_results[0].score) - ), - }, - } - - except Exception as e: - return {"success": False, "error": str(e)} From c4ca927577ba872cbede01bcd33c11f45fb24989 Mon Sep 17 00:00:00 2001 From: NicolasLMP Date: Wed, 30 Jul 2025 16:41:09 +0200 Subject: [PATCH 5/7] fix : language of the spinner's text --- src/webapp/app.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/webapp/app.py b/src/webapp/app.py index f080c63..a4e6cde 100644 --- a/src/webapp/app.py +++ b/src/webapp/app.py @@ -30,26 +30,26 @@ def get_random_loading_message(): """Get a random loading message to show variety""" messages = [ - "Analyse en cours...", - "Recherche dans la documentation...", - "Traitement de votre requête...", - "Analyse de votre question...", - "Recherche d'informations pertinentes...", - "Consultation de la base de connaissances...", - "Formulation d'une réponse...", - "Recherche sur votre sujet...", - "Examen de la documentation...", - "Recherche de la meilleure réponse...", - "Exploration des ressources disponibles...", - "Analyse de votre demande...", - "Collecte d'informations...", - "Compilation des données...", - "Traitement de votre demande...", - "Préparation de la réponse...", - "Consultation des sources...", - "Réflexion en cours...", - "Génération de la réponse...", - "Finalisation de l'analyse...", + "Analysis in progress...", + "Searching documentation...", + "Processing your request...", + "Analyzing your question...", + "Searching for relevant information...", + "Consulting knowledge base...", + "Formulating response...", + "Researching your topic...", + "Examining documentation...", + "Finding the best answer...", + "Exploring available resources...", + "Analyzing your request...", + "Collecting information...", + "Compiling data...", + "Processing your request...", + "Preparing response...", + "Consulting sources...", + "Thinking in progress...", + "Generating response...", + "Finalizing analysis...", ] return random.choice(messages) From 74c16bc211d07f8b7affd668cad85a7ad093174e Mon Sep 17 00:00:00 2001 From: NicolasLMP Date: Wed, 30 Jul 2025 16:50:01 +0200 Subject: [PATCH 6/7] fix : data manager imports --- src/webapp/app.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/webapp/app.py b/src/webapp/app.py index a4e6cde..c8ca790 100644 --- a/src/webapp/app.py +++ b/src/webapp/app.py @@ -23,6 +23,7 @@ from src.rag.semantic_pipeline import SemanticRAGPipelineFactory from src.webapp.components.features_manager import FeaturesManager from src.webapp.components.history_manager import get_history_manager +from src.storage.data_manager import get_data_manager from src.webapp.auth.azure_auth import AzureADAuth from src.webapp.example_prompts import EXAMPLE_PROMPTS @@ -414,7 +415,6 @@ def format_chat_history(conversation_id: str): "reuse_conversation_id", str(uuid.uuid4()) ) st.session_state["messages"] = [] - from src.storage.data_manager import get_data_manager data_manager = get_data_manager() existing_messages = data_manager.get_conversation_history( @@ -613,8 +613,6 @@ def history_page(): def get_real_performance_data(): """Get real performance data from data manager""" try: - from src.storage.data_manager import get_data_manager - data_manager = get_data_manager() # Get recent performance data @@ -649,7 +647,6 @@ def dashboard_page(): try: # Use the new PerformanceDashboard component - from src.storage.data_manager import get_data_manager from src.webapp.components.performance_dashboard import render_performance_dashboard data_manager = get_data_manager() From 43cd21c39bc2b190ca8287d2207dffc25f1474e4 Mon Sep 17 00:00:00 2001 From: NicolasLMP Date: Wed, 30 Jul 2025 18:11:23 +0200 Subject: [PATCH 7/7] fix : delete history in the prompt and updates readme --- README.md | 116 ++++++++++++++---------------- rag_evaluation/core/llm_judge.py | 9 ++- src/cli/commands/query.py | 1 - src/rag/pipeline.py | 2 +- src/rag/semantic_pipeline.py | 4 +- src/rag/tools/generation_tool.py | 15 ++-- src/rag/tools/prompt_templates.py | 6 -- 7 files changed, 65 insertions(+), 88 deletions(-) diff --git a/README.md b/README.md index 7aca650..19fd1b9 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,13 @@ -# Isschat - Enterprise RAG Chatbot +# Isschat - Enterprise RAG chatbot [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/Isskar/Isschat/blob/main/LICENSE) [![Python 3.12+](https://img.shields.io/badge/Python-3.12+-blue.svg)](https://www.python.org/downloads/) +[![CI](https://github.com/Isskar/Isschat/actions/workflows/ci.yml/badge.svg)](https://github.com/Isskar/Isschat/actions/workflows/ci.yml) +[![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-black.svg)](https://github.com/astral-sh/ruff) -A chatbot that provides semantic search and conversational AI capabilities for Confluence knowledge bases using advanced RAG (Retrieval-Augmented Generation) technology with semantic understanding. +A chatbot that provides semantic search and conversational AI capabilities for Confluence knowledge bases using advanced RAG (Retrieval-Augmented Generation) technology. -## Table of Contents +## Table of contents - [Features](#features) - [Installation](#installation) @@ -23,30 +25,37 @@ A chatbot that provides semantic search and conversational AI capabilities for C ## Features -### Core RAG Capabilities -- **Semantic RAG Pipeline**: Advanced semantic understanding with query reformulation and coreference resolution -- **Dual Pipeline Architecture**: Both standard and semantic-enhanced RAG processing with intelligent fallback -- **Intent Classification**: Automatically detects and routes queries based on intent (team_info, project_info, technical_info, feature_info) -- **Query Reformulation**: LLM-based coreference resolution using conversation context for ambiguous queries -- **Intelligent Query Processing**: Handles misleading keywords and provides contextually accurate responses -- **Vector Retrieval**: Direct vector search with semantic re-ranking based on similarity and intent matching -- **Multilingual Support**: Optimized for French and English content with cross-language understanding and synonym handling -- **Contextual Understanding**: Maintains document hierarchy awareness for numerical and structured queries - -### Enterprise Operations -- **Azure AD Authentication**: Secure OAuth 2.0 integration with domain validation -- **Performance Monitoring**: Real-time response time, accuracy metrics, and system health tracking -- **Conversation History**: Persistent chat history with search capabilities and conversation context -- **User Feedback System**: Integrated feedback collection with sentiment analysis and analytics -- **Admin Dashboard**: System management with performance insights, user analytics, and configuration interface - -### Quality Assurance & Evaluation -- **Comprehensive Evaluation Framework**: Multi-category testing (retrieval, generation, business value, robustness) -- **LLM-based Automated Evaluation**: Automated quality assessment using advanced language models with confidence metrics -- **CI/CD Integration**: Automated testing pipeline with configurable quality thresholds -- **Performance Benchmarking**: Continuous performance monitoring and improvement tracking -- **Adaptive Chunking**: Content-type aware document processing with hierarchical chunking -- **Flexible Vector Storage**: Weaviate cloud integration with automated data pipeline and batch optimization +### Core RAG capabilities +- **Document ingestion**: Automated Confluence space crawling and content extraction +- **Hierarchical chunking**: Structure-preserving document segmentation for optimal context retrieval +- **Weaviate integration**: Fast vector search with cosine similarity and HNSW indexing +- **Semantic query processing**: Query reformulation and coreference resolution using LLM +- **Flexible data handling**: Adaptive chunking strategies and support for multiple data sources + +### Chatbot intelligence +- **Persistent history**: Session-aware conversations with memory across interactions +- **Multi-turn dialogue**: Natural conversation flow with context preservation +- **Multilingual support**: Optimized for French and English enterprise use cases +- **Response generation**: Coherent answers synthesized from retrieved knowledge + +### User interfaces +- **Streamlit web app**: Interactive chat interface +- **Streamlit Evaluation dashboard**: Multi-category testing (retrieval, generation, business value, robustness) +- **Command line interface (CLI)**: Complete system management and querying capabilities + +### Enterprise features +- **Azure AD authentication**: Secure access with enterprise domain validation +- **Cloud storage integration**: Azure Blob Storage support for scalable deployments +- **Secret management**: Azure Key Vault integration for secure credential handling +- **Environment support**: Configurable settings for development, staging, and production +- **Feedback integration**: User input collection for continuous model improvement + +### Monitoring & analytics +- **Evaluation dashboard**: Multi-category testing (retrieval, generation, business value, robustness) +- **Performance dashboard**: Real-time system metrics and usage analytics +- **Admin dashboard** *(in development)*: Backend management and monitoring tools +- **CI/CD support**: Integrated testing pipelines and automated deployment workflows +- **Comprehensive logging**: Detailed system activity tracking and debugging support ## Installation @@ -106,7 +115,7 @@ A chatbot that provides semantic search and conversational AI capabilities for C ## Launch -### Web Interface +### Web interface 1. **Install dependencies** ```bash @@ -118,41 +127,35 @@ A chatbot that provides semantic search and conversational AI capabilities for C uv run streamlit run src/webapp/app.py ``` -3. **Reconstruct the database** +3. **Ask your question to Isschat** - Click on the button "Rebuild from Confluence" +### Command line interface (CLI) -4. **Launch the chatbot** +Isschat provides a CLI tool for managing and querying your knowledge base: - Ask your question to the chatbot +#### Available commands -### Command Line Interface (CLI) - -Isschat provides a powerful CLI tool for managing and querying your knowledge base: - -#### Available Commands - -- **Status Check**: Check system components and configuration +- **Status check**: Check system components and configuration ```bash uv run -m src.cli.main status [--verbose] [--component config|ingestion|rag|all] ``` -- **Data Ingestion**: Build or update the vector database from Confluence +- **Data ingestion**: Build or update the vector database from Confluence ```bash uv run -m src.cli.main ingest [--source confluence] [--force-rebuild] [--verbose] ``` -- **Interactive Chat**: Start a chat session without the web interface +- **Interactive chat**: Start a chat session without the web interface ```bash uv run -m src.cli.main chat [--user-id cli_user] ``` -- **Direct Query**: Query the vector database with detailed results +- **Direct query**: Query the vector database with detailed results ```bash uv run -m src.cli.main query -q "your question" [options] ``` -#### Query Command Options +#### Query command options - `-q, --query`: Your search query (required) - `-k, --top-k`: Number of chunks to retrieve (default: 5) @@ -163,7 +166,7 @@ Isschat provides a powerful CLI tool for managing and querying your knowledge ba - `--show-stats`: Display statistics about sources and scores - `--no-llm`: Skip LLM generation and only show retrieved chunks -#### Example Usage +#### Example usage ```bash # Check system status and configuration @@ -182,13 +185,13 @@ uv run -m src.cli.main query -q "How to configure authentication?" -k 3 --show-m uv run -m src.cli.main query -q "project management" --no-llm --show-stats ``` -### Evaluation System +### Evaluation system Run comprehensive RAG evaluation: ```bash # View evaluation dashboard -uv run rag_evaluation/evaluation_dashboard.py +uv run streamlit run rag_evaluation/evaluation_dashboard.py # Run all evaluation categories uv run rag_evaluation/run_evaluation.py @@ -256,20 +259,9 @@ Isschat/ └── README.md # This documentation ``` -### Key Architectural Components - -- **Modular Design**: Clear separation of concerns with pluggable components -- **Factory Patterns**: Flexible component selection (storage, vector DB, etc.) -- **Abstract Interfaces**: Clean abstractions for easy extension and testing -- **Dual Storage Support**: Local files or Azure Blob Storage -- **Multiple Vector Databases**: Weaviate cloud -- **Comprehensive Evaluation**: Built-in testing framework with multiple evaluators -- **Enterprise Security**: Azure AD integration with domain validation -- **CLI and Web Interfaces**: Both command-line and web-based interactions - -## Production Deployment +## Production deployment -### Azure Cloud Deployment +### Azure cloud deployment For production deployment with Azure integration: @@ -281,7 +273,6 @@ AZURE_BLOB_CONTAINER_NAME=your_container_name # Azure Key Vault for Secret Management KEY_VAULT_URL=https://your-keyvault.vault.azure.net/ -ENVIRONMENT=production # Azure AD Authentication (for web app) AZURE_CLIENT_ID=your_azure_app_client_id @@ -289,7 +280,7 @@ AZURE_CLIENT_SECRET=your_azure_app_client_secret AZURE_TENANT_ID=your_azure_tenant_id ``` -### Docker Deployment +### Docker deployment Build and run with Docker: @@ -313,13 +304,12 @@ docker run -d \ isschat ``` -### Local Development +### Local development For local development, leave Azure settings disabled: ```bash USE_AZURE_STORAGE=false -ENVIRONMENT=development ``` ### Testing diff --git a/rag_evaluation/core/llm_judge.py b/rag_evaluation/core/llm_judge.py index 7975113..96b71b4 100644 --- a/rag_evaluation/core/llm_judge.py +++ b/rag_evaluation/core/llm_judge.py @@ -16,14 +16,17 @@ def __init__(self, config: Any): """Initialize LLM judge with configuration""" self.config = config - # Get API key from config + # Get API key and base URL from main Isschat config try: isschat_config = get_config() api_key = convert_to_secret_str(isschat_config.openrouter_api_key) + base_url = isschat_config.openrouter_base_url if not api_key: raise ValueError("OPENROUTER_API_KEY not found in configuration") + if not base_url: + raise ValueError("OPENROUTER_BASE_URL not found in configuration") except Exception as e: - raise ValueError(f"Failed to get API key: {e}") + raise ValueError(f"Failed to get API configuration: {e}") # Configure logging to suppress httpx INFO logs import logging @@ -36,7 +39,7 @@ def __init__(self, config: Any): temperature=config.judge_temperature, max_tokens=config.judge_max_tokens, openai_api_key=api_key, - openai_api_base=config.openrouter_base_url, + openai_api_base=base_url, ) def evaluate_conversational(self, question: str, response: str, expected: str, context: str = "") -> Dict[str, Any]: diff --git a/src/cli/commands/query.py b/src/cli/commands/query.py index 9f7e389..04af970 100644 --- a/src/cli/commands/query.py +++ b/src/cli/commands/query.py @@ -79,7 +79,6 @@ def query( llm_response_dict = generation_tool.generate( query=query, documents=results, - history="", ) llm_response = llm_response_dict.get("answer", "") generation_time = (time.time() - generation_start) * 1000 diff --git a/src/rag/pipeline.py b/src/rag/pipeline.py index e4c9d75..ea17a6c 100644 --- a/src/rag/pipeline.py +++ b/src/rag/pipeline.py @@ -56,7 +56,7 @@ def process_query( if verbose: self.logger.info("🤖 Step 2: Generating response") - generation_result = self.generation_tool.generate(query=query, documents=search_results, history=history) + generation_result = self.generation_tool.generate(query=query, documents=search_results) answer = generation_result["answer"] sources = generation_result["sources"] diff --git a/src/rag/semantic_pipeline.py b/src/rag/semantic_pipeline.py index dc03543..111c054 100644 --- a/src/rag/semantic_pipeline.py +++ b/src/rag/semantic_pipeline.py @@ -127,9 +127,7 @@ def process_query( self.logger.info("🤖 Step 3: Generating response") # Use reformulated query for generation to ensure consistent filtering - generation_result = self.generation_tool.generate( - query=reformulated_query, documents=search_results, history="" - ) + generation_result = self.generation_tool.generate(query=reformulated_query, documents=search_results) answer = generation_result["answer"] sources = generation_result["sources"] diff --git a/src/rag/tools/generation_tool.py b/src/rag/tools/generation_tool.py index 9df73a4..ec23f66 100644 --- a/src/rag/tools/generation_tool.py +++ b/src/rag/tools/generation_tool.py @@ -16,16 +16,13 @@ def __init__(self): if not self.config.openrouter_api_key: raise ValueError("OPENROUTER_API_KEY required for generation") - def generate( - self, query: str, documents: List[RetrievalDocument], history: str = "", numerical_context: Any = None - ) -> Dict[str, Any]: + def generate(self, query: str, documents: List[RetrievalDocument], numerical_context: Any = None) -> Dict[str, Any]: """ Generate response from query and retrieved documents Args: query: User query documents: Retrieved documents with scores - history: Conversation history numerical_context: Optional numerical query processing result Returns: @@ -41,7 +38,7 @@ def generate( context = self._prepare_context(relevant_documents) avg_score = sum(doc.score for doc in documents) / len(documents) if documents else 0.0 - prompt = self._build_prompt(query, context, history, avg_score, numerical_context) + prompt = self._build_prompt(query, context, avg_score, numerical_context) llm_response = self._call_openrouter(prompt) @@ -77,18 +74,14 @@ def _prepare_context(self, documents: List[RetrievalDocument]) -> str: context_parts = [doc.to_context_section(max_content_per_doc) for doc in documents] return "\n\n".join(context_parts) - def _build_prompt( - self, query: str, context: str, history: str = "", avg_score: float = 0.0, numerical_context: Any = None - ) -> str: + def _build_prompt(self, query: str, context: str, avg_score: float = 0.0, numerical_context: Any = None) -> str: """Build prompt based on context quality""" - history_section = f"{history}\n" if history.strip() else "" - # Add numerical context if available if numerical_context: numerical_info = self._format_numerical_context(numerical_context) context = f"{context}\n\n{numerical_info}" - return PromptTemplates.get_default_template().format(context=context, history=history_section, query=query) + return PromptTemplates.get_default_template().format(context=context, query=query) def _format_numerical_context(self, numerical_context: Any) -> str: """Format numerical context for inclusion in prompt""" diff --git a/src/rag/tools/prompt_templates.py b/src/rag/tools/prompt_templates.py index 2cfe94c..b2b6a98 100644 --- a/src/rag/tools/prompt_templates.py +++ b/src/rag/tools/prompt_templates.py @@ -33,12 +33,6 @@ def get_default_template() -> str: ----- {context} ----- -=== CONTEXTE DE LA CONVERSATION === -Historique des échanges précédents dans cette conversation : -(Utilise cet historique pour maintenir la cohérence et éviter les répétitions) ------ -{history} ------ === STYLE ET TON === - Professionnel mais accessible, comme un collègue expérimenté