diff --git a/rag_evaluation/config/evaluation_config.py b/rag_evaluation/config/evaluation_config.py index 09902f6..d9038fc 100644 --- a/rag_evaluation/config/evaluation_config.py +++ b/rag_evaluation/config/evaluation_config.py @@ -3,9 +3,10 @@ """ import json +import subprocess from dataclasses import dataclass, field from pathlib import Path -from typing import Dict, List, Any +from typing import Dict, List, Any, Optional @dataclass @@ -27,6 +28,10 @@ class EvaluationConfig: ci_mode: bool = False fail_on_threshold: bool = True + # Version information + version_description: str = "" + include_git_info: bool = True + # Evaluators configuration (loaded dynamically) _evaluators_config: Dict[str, Any] = field(default_factory=dict, init=False) @@ -81,3 +86,73 @@ def get_dataset_path(self, category: str) -> Path: def get_ci_threshold(self) -> float: """Get CI threshold (only used in CI mode)""" return self.ci_threshold + + def get_git_info(self) -> Optional[Dict[str, str]]: + """Get current git information""" + if not self.include_git_info: + return None + + try: + # Get current branch + branch = ( + subprocess.check_output( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + cwd=Path(__file__).parent.parent.parent, + stderr=subprocess.DEVNULL, + ) + .decode() + .strip() + ) + + # Get last commit hash + commit_hash = ( + subprocess.check_output( + ["git", "rev-parse", "HEAD"], cwd=Path(__file__).parent.parent.parent, stderr=subprocess.DEVNULL + ) + .decode() + .strip() + ) + + # Get last commit message + commit_message = ( + subprocess.check_output( + ["git", "log", "-1", "--pretty=format:%s"], + cwd=Path(__file__).parent.parent.parent, + stderr=subprocess.DEVNULL, + ) + .decode() + .strip() + ) + + # Get last commit author and date + commit_author = ( + subprocess.check_output( + ["git", "log", "-1", "--pretty=format:%an"], + cwd=Path(__file__).parent.parent.parent, + stderr=subprocess.DEVNULL, + ) + .decode() + .strip() + ) + + commit_date = ( + subprocess.check_output( + ["git", "log", "-1", "--pretty=format:%ci"], + cwd=Path(__file__).parent.parent.parent, + stderr=subprocess.DEVNULL, + ) + .decode() + .strip() + ) + + return { + "branch": branch, + "commit_hash": commit_hash, + "commit_hash_short": commit_hash[:8], + "commit_message": commit_message, + "commit_author": commit_author, + "commit_date": commit_date, + } + except (subprocess.CalledProcessError, FileNotFoundError): + # Git not available or not in a git repository + return None diff --git a/rag_evaluation/evaluation_dashboard.py b/rag_evaluation/evaluation_dashboard.py index 66488aa..e933cf7 100644 --- a/rag_evaluation/evaluation_dashboard.py +++ b/rag_evaluation/evaluation_dashboard.py @@ -588,6 +588,30 @@ def render_feedback_analysis(self, _result: Dict[str, Any], eval_details: Dict[s st.markdown("", unsafe_allow_html=True) + def render_version_info_content(self, version_info: Dict[str, Any]): + """Render version information content (without outer container)""" + + # Version description + description = version_info.get("description") + if description: + st.markdown(f"**Description:** {description}") + + # Git information + git_info = version_info.get("git") + if git_info: + col1, col2 = st.columns(2) + + with col1: + st.markdown(f"**Git Branch:** `{git_info.get('branch', 'N/A')}`") + + with col2: + st.markdown(f"**Commit:** `{git_info.get('commit_hash_short', 'N/A')}`") + + st.markdown(f"**Last Commit:** {git_info.get('commit_message', 'N/A')}") + st.markdown( + f"**Author:** {git_info.get('commit_author', 'N/A')} • **Date:** {git_info.get('commit_date', 'N/A')}" + ) + def render_sidebar(self): """Render sidebar with file selection""" # File selection @@ -639,10 +663,16 @@ def render_sidebar(self): def render_overview_metrics(self, results: Dict[str, Dict[str, Any]]): """Render overview metrics for selected files""" - st.header("Metrics overview") + # Display version information first, if available (collapsible) file_name = list(results.keys())[0] data = results[file_name] + version_info = data.get("version_info") + if version_info: + with st.expander("Version Information", expanded=False): + self.render_version_info_content(version_info) + + st.header("Metrics overview") self.render_single_file_metrics(data) def render_single_file_metrics(self, data: Dict[str, Any]): @@ -1429,6 +1459,21 @@ def show_new_evaluation_page(self): # CI mode toggle ci_mode = st.checkbox("CI Mode", value=False, help="Run in CI mode with pass/fail thresholds") + # Version information + st.subheader("Version Information:") + version_description = st.text_area( + "Version Description (optional):", + placeholder="Describe what changes this evaluation version includes compared to previous ones...", + help="Add a description of what this evaluation version tests or what changes were made", + height=100, + ) + + include_git_info = st.checkbox( + "Include Git Information", + value=True, + help="Automatically include current branch, commit, and author information", + ) + # Output file name st.subheader("Output Configuration:") output_name = st.text_input( @@ -1445,6 +1490,8 @@ def show_new_evaluation_page(self): "categories": selected_categories, "ci_mode": ci_mode, "output_name": output_name, + "version_description": version_description, + "include_git_info": include_git_info, } st.rerun() else: @@ -1463,11 +1510,18 @@ def run_evaluation_launcher(self): with col1: st.write(f"**Categories:** {', '.join(params['categories'])}") st.write(f"**CI Mode:** {'Yes' if params['ci_mode'] else 'No'}") + st.write(f"**Include Git Info:** {'Yes' if params.get('include_git_info', True) else 'No'}") with col2: output_file = params["output_name"] if params["output_name"] else "Auto-generated timestamp" st.write(f"**Output File:** {output_file}") + version_desc = params.get("version_description", "").strip() + if version_desc: + st.write(f"**Version Description:** {version_desc[:50]}{'...' if len(version_desc) > 50 else ''}") + else: + st.write("**Version Description:** Not provided") + # Progress and status if "evaluation_running" not in st.session_state: st.session_state.evaluation_running = False @@ -1521,11 +1575,19 @@ def run_evaluation_process(self, params): if params["ci_mode"]: cmd.append("--ci") + # Add version description + version_desc = params.get("version_description", "").strip() + if version_desc: + cmd.extend(["--version-description", version_desc]) + + # Add git info setting + if not params.get("include_git_info", True): + cmd.append("--no-git-info") + # Add output file if params["output_name"]: - # Use absolute path to ensure it goes to the correct directory - output_path = Path(__file__).parent.parent / "evaluation_results" / params["output_name"] - cmd.extend(["--output", str(output_path)]) + # Just pass the filename, let main.py handle the directory + cmd.extend(["--output", params["output_name"]]) status_text.text("Starting evaluation process...") progress_bar.progress(10) diff --git a/rag_evaluation/main.py b/rag_evaluation/main.py index becf09f..1598926 100644 --- a/rag_evaluation/main.py +++ b/rag_evaluation/main.py @@ -182,8 +182,17 @@ def run_full_evaluation(self, categories: Optional[List[str]] = None) -> Dict[st overall_stats["overall_pass_rate"] = 0.0 # Store results + version_info = {} + if self.config.version_description: + version_info["description"] = self.config.version_description + + git_info = self.config.get_git_info() + if git_info: + version_info["git"] = git_info + self.results = { "timestamp": datetime.now().isoformat(), + "version_info": version_info if version_info else None, "config": { "ci_mode": self.config.ci_mode, "categories_run": selected_categories, @@ -250,6 +259,20 @@ def print_summary(self) -> None: print(f"\n{'=' * 60}") print("EVALUATION SUMMARY") print(f"{'=' * 60}") + + # Show version information if available + version_info = self.results.get("version_info") + if version_info: + if version_info.get("description"): + print(f"Version Description: {version_info['description']}") + + git_info = version_info.get("git") + if git_info: + print(f"Git Branch: {git_info['branch']}") + print(f"Last Commit: {git_info['commit_hash_short']} - {git_info['commit_message']}") + print(f"Commit Author: {git_info['commit_author']} ({git_info['commit_date']})") + print(f"{'-' * 60}") + print(f"Total Tests: {overall_stats.get('total_tests', 0)}") if self.config.ci_mode: @@ -329,6 +352,17 @@ def main(): action="store_true", help="Generate an HTML report after evaluation", ) + parser.add_argument( + "--version-description", + "--desc", + type=str, + help="Textual description of what this evaluation version adds compared to older ones", + ) + parser.add_argument( + "--no-git-info", + action="store_true", + help="Disable automatic git information collection", + ) args = parser.parse_args() try: @@ -336,6 +370,10 @@ def main(): config = EvaluationConfig() if args.ci: config.ci_mode = True + if args.version_description: + config.version_description = args.version_description + if args.no_git_info: + config.include_git_info = False # Create evaluation manager manager = EvaluationManager(config)