From 745beffe1ea2ae03cea3f0f4173d8420beb4f973 Mon Sep 17 00:00:00 2001
From: Jacob Ioffe <jioffe@nvidia.com>
Date: Tue, 20 Jan 2026 21:00:58 +0000
Subject: [PATCH 1/2] recall in harness outputs nothing fix

---
 .../src/nv_ingest_harness/cases/recall.py     | 98 ++++++++++---------
 1 file changed, 50 insertions(+), 48 deletions(-)

diff --git a/tools/harness/src/nv_ingest_harness/cases/recall.py b/tools/harness/src/nv_ingest_harness/cases/recall.py
index 62c4fd1c5..985fa55bc 100644
--- a/tools/harness/src/nv_ingest_harness/cases/recall.py
+++ b/tools/harness/src/nv_ingest_harness/cases/recall.py
@@ -5,6 +5,7 @@
 import json
 import os
 import time
+import traceback
 from typing import Callable, Dict, Tuple
 
 from nv_ingest_harness.utils.interact import embed_info, kv_event_log
@@ -123,27 +124,28 @@ def main(config=None, log_path: str = "test_results") -> int:
     if lancedb_path:
         print(f"Using LanceDB at: {lancedb_path}")
 
-    try:
-        recall_results = {}
-
-        # Prepare evaluation parameters
-        evaluation_params = {
-            "hostname": hostname,
-            "sparse": sparse,
-            "model_name": model_name,
-            "top_k": recall_top_k,
-            "gpu_search": gpu_search,
-            "ground_truth_dir": ground_truth_dir,
-            "vdb_backend": vdb_backend,
-            "nv_ranker_endpoint": f"http://{hostname}:8020/v1/ranking",
-            "nv_ranker_model_name": "nvidia/llama-3.2-nv-rerankqa-1b-v2",
-        }
-        if vdb_backend == "lancedb":
-            evaluation_params["sparse"] = False  # LanceDB doesn't support hybrid search
-            evaluation_params["table_path"] = lancedb_path
-
-        # Run without reranker (if mode is "none" or "both")
-        if reranker_mode in ["none", "both"]:
+    recall_results = {}
+    exit_code = 0
+
+    # Prepare evaluation parameters
+    evaluation_params = {
+        "hostname": hostname,
+        "sparse": sparse,
+        "model_name": model_name,
+        "top_k": recall_top_k,
+        "gpu_search": gpu_search,
+        "ground_truth_dir": ground_truth_dir,
+        "vdb_backend": vdb_backend,
+        "nv_ranker_endpoint": f"http://{hostname}:8020/v1/ranking",
+        "nv_ranker_model_name": "nvidia/llama-3.2-nv-rerankqa-1b-v2",
+    }
+    if vdb_backend == "lancedb":
+        evaluation_params["sparse"] = False  # LanceDB doesn't support hybrid search
+        evaluation_params["table_path"] = lancedb_path
+
+    # Run without reranker (if mode is "none" or "both")
+    if reranker_mode in ["none", "both"]:
+        try:
             scores, _ = evaluate_recall_with_reranker(
                 evaluator=evaluator,
                 collection_name=collection_name,
@@ -152,9 +154,13 @@ def main(config=None, log_path: str = "test_results") -> int:
                 log_path=log_path,
             )
             recall_results["no_reranker"] = scores
+        except Exception as e:
+            print(f"ERROR: Recall evaluation (without reranker) failed: {e}")
+            traceback.print_exc()
+            exit_code = 1
 
-        # Run with reranker (if mode is "with" or "both")
-        if reranker_mode in ["with", "both"]:
+    if reranker_mode in ["with", "both"]:
+        try:
             scores, _ = evaluate_recall_with_reranker(
                 evaluator=evaluator,
                 collection_name=collection_name,
@@ -163,32 +169,28 @@ def main(config=None, log_path: str = "test_results") -> int:
                 log_path=log_path,
             )
             recall_results["with_reranker"] = scores
+        except Exception as e:
+            print(f"ERROR: Recall evaluation (with reranker) failed: {e}")
+            traceback.print_exc()
+            exit_code = 1
+
+    results_file = os.path.join(log_path, "_test_results.json")
+    test_results = {
+        "test_type": "recall",
+        "dataset": recall_dataset,
+        "test_name": test_name,
+        "collection_name": collection_name,
+        "reranker_mode": reranker_mode,
+        "recall_results": recall_results,
+    }
+    with open(results_file, "w") as f:
+        json.dump(test_results, f, indent=2)
 
-        # Save results
-        results_file = os.path.join(log_path, "_test_results.json")
-        test_results = {
-            "test_type": "recall",
-            "dataset": recall_dataset,
-            "test_name": test_name,
-            "collection_name": collection_name,
-            "reranker_mode": reranker_mode,
-            "recall_results": recall_results,
-        }
-        with open(results_file, "w") as f:
-            json.dump(test_results, f, indent=2)
-
-        print("\n" + "=" * 60)
-        print("Recall Evaluation Complete")
-        print("=" * 60)
-
-        return 0
-
-    except Exception as e:
-        print(f"ERROR: Recall evaluation failed: {e}")
-        import traceback
-
-        traceback.print_exc()
-        return 1
+    print("\n" + "=" * 60)
+    print("Recall Evaluation Complete" if exit_code == 0 else "Recall Evaluation Completed with Errors")
+    print("=" * 60)
+
+    return exit_code
 
 
 if __name__ == "__main__":

From b2c5a390986cfe5c881a8ec67eac333e7acd2e4e Mon Sep 17 00:00:00 2001
From: Jacob Ioffe <jioffe@nvidia.com>
Date: Tue, 20 Jan 2026 21:21:32 +0000
Subject: [PATCH 2/2] fixing fallback to build

---
 .../src/nv_ingest_harness/cases/recall.py     | 32 +++++++++++++++----
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/tools/harness/src/nv_ingest_harness/cases/recall.py b/tools/harness/src/nv_ingest_harness/cases/recall.py
index 985fa55bc..1ffb592bd 100644
--- a/tools/harness/src/nv_ingest_harness/cases/recall.py
+++ b/tools/harness/src/nv_ingest_harness/cases/recall.py
@@ -127,7 +127,9 @@ def main(config=None, log_path: str = "test_results") -> int:
     recall_results = {}
     exit_code = 0
 
-    # Prepare evaluation parameters
+    local_reranker_endpoint = f"http://{hostname}:8020/v1/ranking"
+    build_reranker_endpoint = "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking"
+
     evaluation_params = {
         "hostname": hostname,
         "sparse": sparse,
@@ -136,14 +138,13 @@ def main(config=None, log_path: str = "test_results") -> int:
         "gpu_search": gpu_search,
         "ground_truth_dir": ground_truth_dir,
         "vdb_backend": vdb_backend,
-        "nv_ranker_endpoint": f"http://{hostname}:8020/v1/ranking",
+        "nv_ranker_endpoint": local_reranker_endpoint,
         "nv_ranker_model_name": "nvidia/llama-3.2-nv-rerankqa-1b-v2",
     }
     if vdb_backend == "lancedb":
-        evaluation_params["sparse"] = False  # LanceDB doesn't support hybrid search
+        evaluation_params["sparse"] = False
         evaluation_params["table_path"] = lancedb_path
 
-    # Run without reranker (if mode is "none" or "both")
     if reranker_mode in ["none", "both"]:
         try:
             scores, _ = evaluate_recall_with_reranker(
@@ -170,9 +171,26 @@ def main(config=None, log_path: str = "test_results") -> int:
             )
             recall_results["with_reranker"] = scores
         except Exception as e:
-            print(f"ERROR: Recall evaluation (with reranker) failed: {e}")
-            traceback.print_exc()
-            exit_code = 1
+            if "connection failed" in str(e).lower():
+                print("Local reranker unavailable, falling back to build.nvidia endpoint")
+                evaluation_params["nv_ranker_endpoint"] = build_reranker_endpoint
+                try:
+                    scores, _ = evaluate_recall_with_reranker(
+                        evaluator=evaluator,
+                        collection_name=collection_name,
+                        evaluation_params=evaluation_params,
+                        use_reranker=True,
+                        log_path=log_path,
+                    )
+                    recall_results["with_reranker"] = scores
+                except Exception as fallback_e:
+                    print(f"ERROR: Reranker fallback failed: {fallback_e}")
+                    traceback.print_exc()
+                    exit_code = 1
+            else:
+                print(f"ERROR: Recall evaluation (with reranker) failed: {e}")
+                traceback.print_exc()
+                exit_code = 1
 
     results_file = os.path.join(log_path, "_test_results.json")
     test_results = {