From 19f86f811d2ab1fb7083dbff4d4b1f5098b9f5d0 Mon Sep 17 00:00:00 2001 From: SJTUyh Date: Thu, 5 Mar 2026 18:54:57 +0800 Subject: [PATCH 1/2] bugfix for judge infer --- ais_bench/benchmark/cli/workers.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ais_bench/benchmark/cli/workers.py b/ais_bench/benchmark/cli/workers.py index 4c28f174..cec82cc4 100644 --- a/ais_bench/benchmark/cli/workers.py +++ b/ais_bench/benchmark/cli/workers.py @@ -115,8 +115,19 @@ def _update_tasks_cfg(self, tasks, cfg: ConfigDict): class JudgeInfer(BaseWorker): def update_cfg(self, cfg: ConfigDict) -> None: + self.judge_model_type = None + + for dataset_cfg in cfg["datasets"]: + judge_infer_cfg = dataset_cfg.get("judge_infer_cfg") + if judge_infer_cfg: + self.judge_model_type = judge_infer_cfg["judge_model"]["attr"] + + if self.judge_model_type is None: + logger.debug("Skip Judge Infer") + return cfg + def get_task_type() -> str: - if cfg["datasets"][0]["judge_infer_cfg"]["judge_model"]["attr"] == "service": + if self.judge_model_type == "service": return get_config_type(OpenICLApiInferTask) else: return get_config_type(OpenICLInferTask) @@ -141,6 +152,10 @@ def get_task_type() -> str: return cfg def do_work(self, cfg: ConfigDict): + if self.judge_model_type is None: + logger.debug("Skip Judge Infer") + return + partitioner = PARTITIONERS.build(cfg.judge_infer.partitioner) logger.info("Starting inference tasks...") self._cfg_pre_process(cfg) From f1b2d929f1248115c7638b88b4ce598e412608a4 Mon Sep 17 00:00:00 2001 From: SJTUyh Date: Thu, 5 Mar 2026 19:13:40 +0800 Subject: [PATCH 2/2] bugfix for judge infer --- ais_bench/benchmark/cli/workers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ais_bench/benchmark/cli/workers.py b/ais_bench/benchmark/cli/workers.py index cec82cc4..ce1dd8bb 100644 --- a/ais_bench/benchmark/cli/workers.py +++ b/ais_bench/benchmark/cli/workers.py @@ -114,9 +114,11 @@ def _update_tasks_cfg(self, tasks, cfg: ConfigDict): class JudgeInfer(BaseWorker): - def update_cfg(self, cfg: ConfigDict) -> None: + def __init__(self, args) -> None: + super().__init__(args) self.judge_model_type = None + def update_cfg(self, cfg: ConfigDict) -> None: for dataset_cfg in cfg["datasets"]: judge_infer_cfg = dataset_cfg.get("judge_infer_cfg") if judge_infer_cfg: