diff --git a/tools/harness/src/nv_ingest_harness/cases/recall.py b/tools/harness/src/nv_ingest_harness/cases/recall.py index 62c4fd1c5..1ffb592bd 100644 --- a/tools/harness/src/nv_ingest_harness/cases/recall.py +++ b/tools/harness/src/nv_ingest_harness/cases/recall.py @@ -5,6 +5,7 @@ import json import os import time +import traceback from typing import Callable, Dict, Tuple from nv_ingest_harness.utils.interact import embed_info, kv_event_log @@ -123,27 +124,29 @@ def main(config=None, log_path: str = "test_results") -> int: if lancedb_path: print(f"Using LanceDB at: {lancedb_path}") - try: - recall_results = {} - - # Prepare evaluation parameters - evaluation_params = { - "hostname": hostname, - "sparse": sparse, - "model_name": model_name, - "top_k": recall_top_k, - "gpu_search": gpu_search, - "ground_truth_dir": ground_truth_dir, - "vdb_backend": vdb_backend, - "nv_ranker_endpoint": f"http://{hostname}:8020/v1/ranking", - "nv_ranker_model_name": "nvidia/llama-3.2-nv-rerankqa-1b-v2", - } - if vdb_backend == "lancedb": - evaluation_params["sparse"] = False # LanceDB doesn't support hybrid search - evaluation_params["table_path"] = lancedb_path - - # Run without reranker (if mode is "none" or "both") - if reranker_mode in ["none", "both"]: + recall_results = {} + exit_code = 0 + + local_reranker_endpoint = f"http://{hostname}:8020/v1/ranking" + build_reranker_endpoint = "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking" + + evaluation_params = { + "hostname": hostname, + "sparse": sparse, + "model_name": model_name, + "top_k": recall_top_k, + "gpu_search": gpu_search, + "ground_truth_dir": ground_truth_dir, + "vdb_backend": vdb_backend, + "nv_ranker_endpoint": local_reranker_endpoint, + "nv_ranker_model_name": "nvidia/llama-3.2-nv-rerankqa-1b-v2", + } + if vdb_backend == "lancedb": + evaluation_params["sparse"] = False + evaluation_params["table_path"] = lancedb_path + + if reranker_mode in ["none", "both"]: + try: scores, _ = evaluate_recall_with_reranker( evaluator=evaluator, collection_name=collection_name, @@ -152,9 +155,13 @@ def main(config=None, log_path: str = "test_results") -> int: log_path=log_path, ) recall_results["no_reranker"] = scores + except Exception as e: + print(f"ERROR: Recall evaluation (without reranker) failed: {e}") + traceback.print_exc() + exit_code = 1 - # Run with reranker (if mode is "with" or "both") - if reranker_mode in ["with", "both"]: + if reranker_mode in ["with", "both"]: + try: scores, _ = evaluate_recall_with_reranker( evaluator=evaluator, collection_name=collection_name, @@ -163,32 +170,45 @@ def main(config=None, log_path: str = "test_results") -> int: log_path=log_path, ) recall_results["with_reranker"] = scores + except Exception as e: + if "connection failed" in str(e).lower(): + print("Local reranker unavailable, falling back to build.nvidia endpoint") + evaluation_params["nv_ranker_endpoint"] = build_reranker_endpoint + try: + scores, _ = evaluate_recall_with_reranker( + evaluator=evaluator, + collection_name=collection_name, + evaluation_params=evaluation_params, + use_reranker=True, + log_path=log_path, + ) + recall_results["with_reranker"] = scores + except Exception as fallback_e: + print(f"ERROR: Reranker fallback failed: {fallback_e}") + traceback.print_exc() + exit_code = 1 + else: + print(f"ERROR: Recall evaluation (with reranker) failed: {e}") + traceback.print_exc() + exit_code = 1 + + results_file = os.path.join(log_path, "_test_results.json") + test_results = { + "test_type": "recall", + "dataset": recall_dataset, + "test_name": test_name, + "collection_name": collection_name, + "reranker_mode": reranker_mode, + "recall_results": recall_results, + } + with open(results_file, "w") as f: + json.dump(test_results, f, indent=2) - # Save results - results_file = os.path.join(log_path, "_test_results.json") - test_results = { - "test_type": "recall", - "dataset": recall_dataset, - "test_name": test_name, - "collection_name": collection_name, - "reranker_mode": reranker_mode, - "recall_results": recall_results, - } - with open(results_file, "w") as f: - json.dump(test_results, f, indent=2) - - print("\n" + "=" * 60) - print("Recall Evaluation Complete") - print("=" * 60) - - return 0 - - except Exception as e: - print(f"ERROR: Recall evaluation failed: {e}") - import traceback - - traceback.print_exc() - return 1 + print("\n" + "=" * 60) + print("Recall Evaluation Complete" if exit_code == 0 else "Recall Evaluation Completed with Errors") + print("=" * 60) + + return exit_code if __name__ == "__main__":