Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 68 additions & 48 deletions tools/harness/src/nv_ingest_harness/cases/recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import os
import time
import traceback
from typing import Callable, Dict, Tuple

from nv_ingest_harness.utils.interact import embed_info, kv_event_log
Expand Down Expand Up @@ -123,27 +124,29 @@ def main(config=None, log_path: str = "test_results") -> int:
if lancedb_path:
print(f"Using LanceDB at: {lancedb_path}")

try:
recall_results = {}

# Prepare evaluation parameters
evaluation_params = {
"hostname": hostname,
"sparse": sparse,
"model_name": model_name,
"top_k": recall_top_k,
"gpu_search": gpu_search,
"ground_truth_dir": ground_truth_dir,
"vdb_backend": vdb_backend,
"nv_ranker_endpoint": f"http://{hostname}:8020/v1/ranking",
"nv_ranker_model_name": "nvidia/llama-3.2-nv-rerankqa-1b-v2",
}
if vdb_backend == "lancedb":
evaluation_params["sparse"] = False # LanceDB doesn't support hybrid search
evaluation_params["table_path"] = lancedb_path

# Run without reranker (if mode is "none" or "both")
if reranker_mode in ["none", "both"]:
recall_results = {}
exit_code = 0

local_reranker_endpoint = f"http://{hostname}:8020/v1/ranking"
build_reranker_endpoint = "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking"

evaluation_params = {
"hostname": hostname,
"sparse": sparse,
"model_name": model_name,
"top_k": recall_top_k,
"gpu_search": gpu_search,
"ground_truth_dir": ground_truth_dir,
"vdb_backend": vdb_backend,
"nv_ranker_endpoint": local_reranker_endpoint,
"nv_ranker_model_name": "nvidia/llama-3.2-nv-rerankqa-1b-v2",
}
if vdb_backend == "lancedb":
evaluation_params["sparse"] = False
evaluation_params["table_path"] = lancedb_path

if reranker_mode in ["none", "both"]:
try:
scores, _ = evaluate_recall_with_reranker(
evaluator=evaluator,
collection_name=collection_name,
Expand All @@ -152,9 +155,13 @@ def main(config=None, log_path: str = "test_results") -> int:
log_path=log_path,
)
recall_results["no_reranker"] = scores
except Exception as e:
print(f"ERROR: Recall evaluation (without reranker) failed: {e}")
traceback.print_exc()
exit_code = 1

# Run with reranker (if mode is "with" or "both")
if reranker_mode in ["with", "both"]:
if reranker_mode in ["with", "both"]:
try:
scores, _ = evaluate_recall_with_reranker(
evaluator=evaluator,
collection_name=collection_name,
Expand All @@ -163,32 +170,45 @@ def main(config=None, log_path: str = "test_results") -> int:
log_path=log_path,
)
recall_results["with_reranker"] = scores
except Exception as e:
if "connection failed" in str(e).lower():
print("Local reranker unavailable, falling back to build.nvidia endpoint")
evaluation_params["nv_ranker_endpoint"] = build_reranker_endpoint
try:
scores, _ = evaluate_recall_with_reranker(
evaluator=evaluator,
collection_name=collection_name,
evaluation_params=evaluation_params,
use_reranker=True,
log_path=log_path,
)
recall_results["with_reranker"] = scores
except Exception as fallback_e:
print(f"ERROR: Reranker fallback failed: {fallback_e}")
traceback.print_exc()
exit_code = 1
else:
print(f"ERROR: Recall evaluation (with reranker) failed: {e}")
traceback.print_exc()
exit_code = 1

results_file = os.path.join(log_path, "_test_results.json")
test_results = {
"test_type": "recall",
"dataset": recall_dataset,
"test_name": test_name,
"collection_name": collection_name,
"reranker_mode": reranker_mode,
"recall_results": recall_results,
}
with open(results_file, "w") as f:
json.dump(test_results, f, indent=2)

# Save results
results_file = os.path.join(log_path, "_test_results.json")
test_results = {
"test_type": "recall",
"dataset": recall_dataset,
"test_name": test_name,
"collection_name": collection_name,
"reranker_mode": reranker_mode,
"recall_results": recall_results,
}
with open(results_file, "w") as f:
json.dump(test_results, f, indent=2)

print("\n" + "=" * 60)
print("Recall Evaluation Complete")
print("=" * 60)

return 0

except Exception as e:
print(f"ERROR: Recall evaluation failed: {e}")
import traceback

traceback.print_exc()
return 1
print("\n" + "=" * 60)
print("Recall Evaluation Complete" if exit_code == 0 else "Recall Evaluation Completed with Errors")
print("=" * 60)

return exit_code


if __name__ == "__main__":
Expand Down
Loading