Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 76 additions & 1 deletion rag_evaluation/config/evaluation_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
"""

import json
import subprocess
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, List, Any
from typing import Dict, List, Any, Optional


@dataclass
Expand All @@ -27,6 +28,10 @@ class EvaluationConfig:
ci_mode: bool = False
fail_on_threshold: bool = True

# Version information
version_description: str = ""
include_git_info: bool = True

# Evaluators configuration (loaded dynamically)
_evaluators_config: Dict[str, Any] = field(default_factory=dict, init=False)

Expand Down Expand Up @@ -81,3 +86,73 @@ def get_dataset_path(self, category: str) -> Path:
def get_ci_threshold(self) -> float:
"""Get CI threshold (only used in CI mode)"""
return self.ci_threshold

def get_git_info(self) -> Optional[Dict[str, str]]:
"""Get current git information"""
if not self.include_git_info:
return None

try:
# Get current branch
branch = (
subprocess.check_output(
["git", "rev-parse", "--abbrev-ref", "HEAD"],
cwd=Path(__file__).parent.parent.parent,
stderr=subprocess.DEVNULL,
)
.decode()
.strip()
)

# Get last commit hash
commit_hash = (
subprocess.check_output(
["git", "rev-parse", "HEAD"], cwd=Path(__file__).parent.parent.parent, stderr=subprocess.DEVNULL
)
.decode()
.strip()
)

# Get last commit message
commit_message = (
subprocess.check_output(
["git", "log", "-1", "--pretty=format:%s"],
cwd=Path(__file__).parent.parent.parent,
stderr=subprocess.DEVNULL,
)
.decode()
.strip()
)

# Get last commit author and date
commit_author = (
subprocess.check_output(
["git", "log", "-1", "--pretty=format:%an"],
cwd=Path(__file__).parent.parent.parent,
stderr=subprocess.DEVNULL,
)
.decode()
.strip()
)

commit_date = (
subprocess.check_output(
["git", "log", "-1", "--pretty=format:%ci"],
cwd=Path(__file__).parent.parent.parent,
stderr=subprocess.DEVNULL,
)
.decode()
.strip()
)

return {
"branch": branch,
"commit_hash": commit_hash,
"commit_hash_short": commit_hash[:8],
"commit_message": commit_message,
"commit_author": commit_author,
"commit_date": commit_date,
}
except (subprocess.CalledProcessError, FileNotFoundError):
# Git not available or not in a git repository
return None
70 changes: 66 additions & 4 deletions rag_evaluation/evaluation_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,30 @@ def render_feedback_analysis(self, _result: Dict[str, Any], eval_details: Dict[s

st.markdown("</div>", unsafe_allow_html=True)

def render_version_info_content(self, version_info: Dict[str, Any]):
"""Render version information content (without outer container)"""

# Version description
description = version_info.get("description")
if description:
st.markdown(f"**Description:** {description}")

# Git information
git_info = version_info.get("git")
if git_info:
col1, col2 = st.columns(2)

with col1:
st.markdown(f"**Git Branch:** `{git_info.get('branch', 'N/A')}`")

with col2:
st.markdown(f"**Commit:** `{git_info.get('commit_hash_short', 'N/A')}`")

st.markdown(f"**Last Commit:** {git_info.get('commit_message', 'N/A')}")
st.markdown(
f"**Author:** {git_info.get('commit_author', 'N/A')} • **Date:** {git_info.get('commit_date', 'N/A')}"
)

def render_sidebar(self):
"""Render sidebar with file selection"""
# File selection
Expand Down Expand Up @@ -639,10 +663,16 @@ def render_sidebar(self):

def render_overview_metrics(self, results: Dict[str, Dict[str, Any]]):
"""Render overview metrics for selected files"""
st.header("Metrics overview")

# Display version information first, if available (collapsible)
file_name = list(results.keys())[0]
data = results[file_name]
version_info = data.get("version_info")
if version_info:
with st.expander("Version Information", expanded=False):
self.render_version_info_content(version_info)

st.header("Metrics overview")
self.render_single_file_metrics(data)

def render_single_file_metrics(self, data: Dict[str, Any]):
Expand Down Expand Up @@ -1429,6 +1459,21 @@ def show_new_evaluation_page(self):
# CI mode toggle
ci_mode = st.checkbox("CI Mode", value=False, help="Run in CI mode with pass/fail thresholds")

# Version information
st.subheader("Version Information:")
version_description = st.text_area(
"Version Description (optional):",
placeholder="Describe what changes this evaluation version includes compared to previous ones...",
help="Add a description of what this evaluation version tests or what changes were made",
height=100,
)

include_git_info = st.checkbox(
"Include Git Information",
value=True,
help="Automatically include current branch, commit, and author information",
)

# Output file name
st.subheader("Output Configuration:")
output_name = st.text_input(
Expand All @@ -1445,6 +1490,8 @@ def show_new_evaluation_page(self):
"categories": selected_categories,
"ci_mode": ci_mode,
"output_name": output_name,
"version_description": version_description,
"include_git_info": include_git_info,
}
st.rerun()
else:
Expand All @@ -1463,11 +1510,18 @@ def run_evaluation_launcher(self):
with col1:
st.write(f"**Categories:** {', '.join(params['categories'])}")
st.write(f"**CI Mode:** {'Yes' if params['ci_mode'] else 'No'}")
st.write(f"**Include Git Info:** {'Yes' if params.get('include_git_info', True) else 'No'}")

with col2:
output_file = params["output_name"] if params["output_name"] else "Auto-generated timestamp"
st.write(f"**Output File:** {output_file}")

version_desc = params.get("version_description", "").strip()
if version_desc:
st.write(f"**Version Description:** {version_desc[:50]}{'...' if len(version_desc) > 50 else ''}")
else:
st.write("**Version Description:** Not provided")

# Progress and status
if "evaluation_running" not in st.session_state:
st.session_state.evaluation_running = False
Expand Down Expand Up @@ -1521,11 +1575,19 @@ def run_evaluation_process(self, params):
if params["ci_mode"]:
cmd.append("--ci")

# Add version description
version_desc = params.get("version_description", "").strip()
if version_desc:
cmd.extend(["--version-description", version_desc])

# Add git info setting
if not params.get("include_git_info", True):
cmd.append("--no-git-info")

# Add output file
if params["output_name"]:
# Use absolute path to ensure it goes to the correct directory
output_path = Path(__file__).parent.parent / "evaluation_results" / params["output_name"]
cmd.extend(["--output", str(output_path)])
# Just pass the filename, let main.py handle the directory
cmd.extend(["--output", params["output_name"]])

status_text.text("Starting evaluation process...")
progress_bar.progress(10)
Expand Down
38 changes: 38 additions & 0 deletions rag_evaluation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,17 @@ def run_full_evaluation(self, categories: Optional[List[str]] = None) -> Dict[st
overall_stats["overall_pass_rate"] = 0.0

# Store results
version_info = {}
if self.config.version_description:
version_info["description"] = self.config.version_description

git_info = self.config.get_git_info()
if git_info:
version_info["git"] = git_info

self.results = {
"timestamp": datetime.now().isoformat(),
"version_info": version_info if version_info else None,
"config": {
"ci_mode": self.config.ci_mode,
"categories_run": selected_categories,
Expand Down Expand Up @@ -250,6 +259,20 @@ def print_summary(self) -> None:
print(f"\n{'=' * 60}")
print("EVALUATION SUMMARY")
print(f"{'=' * 60}")

# Show version information if available
version_info = self.results.get("version_info")
if version_info:
if version_info.get("description"):
print(f"Version Description: {version_info['description']}")

git_info = version_info.get("git")
if git_info:
print(f"Git Branch: {git_info['branch']}")
print(f"Last Commit: {git_info['commit_hash_short']} - {git_info['commit_message']}")
print(f"Commit Author: {git_info['commit_author']} ({git_info['commit_date']})")
print(f"{'-' * 60}")

print(f"Total Tests: {overall_stats.get('total_tests', 0)}")

if self.config.ci_mode:
Expand Down Expand Up @@ -329,13 +352,28 @@ def main():
action="store_true",
help="Generate an HTML report after evaluation",
)
parser.add_argument(
"--version-description",
"--desc",
type=str,
help="Textual description of what this evaluation version adds compared to older ones",
)
parser.add_argument(
"--no-git-info",
action="store_true",
help="Disable automatic git information collection",
)
args = parser.parse_args()

try:
# Load configuration
config = EvaluationConfig()
if args.ci:
config.ci_mode = True
if args.version_description:
config.version_description = args.version_description
if args.no_git_info:
config.include_git_info = False

# Create evaluation manager
manager = EvaluationManager(config)
Expand Down