From c92641ac4c6097a931b2669d596febe16bd8ae84 Mon Sep 17 00:00:00 2001 From: Harry Xie Date: Thu, 7 Nov 2024 11:35:38 +0000 Subject: [PATCH 1/2] use run_stats[1] to account for new run stats sorting (with latest first); formatting --- src/humanloop/eval_utils.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/humanloop/eval_utils.py b/src/humanloop/eval_utils.py index b13d6c3b..8ff2cf8a 100644 --- a/src/humanloop/eval_utils.py +++ b/src/humanloop/eval_utils.py @@ -233,16 +233,16 @@ def _run_eval( raise NotImplementedError(f"Unsupported File type: {type_}") # Upsert the Dataset - action = dataset.get("action", "set") # set is the server default - None not allowed. + action = dataset.get( + "action", "set" + ) # set is the server default - None not allowed. if "datapoints" not in dataset: dataset["datapoints"] = [] # Use `upsert` to get existing dataset ID if no datapoints provided, given we can't `get` on path. action = "add" hl_dataset = client.datasets.upsert(**dataset, action=action) hl_dataset = client.datasets.get( - id=hl_dataset.id, - version_id=hl_dataset.version_id, - include_datapoints=True + id=hl_dataset.id, version_id=hl_dataset.version_id, include_datapoints=True ) # Upsert the local Evaluators; other Evaluators are just referenced by `path` or `id` @@ -615,7 +615,8 @@ def check_evaluation_improvement( return True, 0, 0 previous_evaluator_stats_by_path = get_evaluator_stats_by_path( - stat=stats.run_stats[-2], evaluation=evaluation + stat=stats.run_stats[1], # Latest Run is at index 0; previous Run is at index 1 + evaluation=evaluation, ) if ( evaluator_path in latest_evaluator_stats_by_path @@ -625,6 +626,8 @@ def check_evaluation_improvement( previous_evaluator_stat = previous_evaluator_stats_by_path[evaluator_path] latest_score = get_score_from_evaluator_stat(stat=latest_evaluator_stat) previous_score = get_score_from_evaluator_stat(stat=previous_evaluator_stat) + if latest_score is None or previous_score is None: + raise ValueError(f"Could not find score for Evaluator {evaluator_path}.") diff = round(latest_score - previous_score, 2) if diff >= 0: logger.info( From 5021e53a15c2fc6278d0c082e34e994a46e87262 Mon Sep 17 00:00:00 2001 From: Harry Xie Date: Thu, 7 Nov 2024 11:47:36 +0000 Subject: [PATCH 2/2] use run-level status --- src/humanloop/eval_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/humanloop/eval_utils.py b/src/humanloop/eval_utils.py index 8ff2cf8a..1eb09e2f 100644 --- a/src/humanloop/eval_utils.py +++ b/src/humanloop/eval_utils.py @@ -422,7 +422,11 @@ def process_datapoint(datapoint: Datapoint): while not complete: stats = client.evaluations.get_stats(id=evaluation.id) logger.info(f"\r{stats.progress}") - complete = stats.status == "completed" + run_stats = next( + (run_stats for run_stats in stats.run_stats if run_stats.run_id == run_id), + None, + ) + complete = run_stats is not None and run_stats.status == "completed" if not complete: time.sleep(5)