Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 22 additions & 10 deletions api/oss/src/core/evaluators/service.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
from typing import Optional, List
from uuid import UUID, uuid4
from json import loads

from oss.src.utils.helpers import get_slug_from_name_and_id
from oss.src.services.db_manager import fetch_evaluator_config
from oss.src.core.workflows.dtos import (
WorkflowFlags,
WorkflowQueryFlags,
#
WorkflowCreate,
WorkflowEdit,
WorkflowQuery,
Expand All @@ -17,8 +13,6 @@
WorkflowVariantEdit,
WorkflowVariantQuery,
#
WorkflowRevisionData,
#
WorkflowRevisionCreate,
WorkflowRevisionEdit,
WorkflowRevisionCommit,
Expand All @@ -35,11 +29,7 @@
SimpleEvaluatorEdit,
SimpleEvaluatorQuery,
SimpleEvaluatorFlags,
SimpleEvaluatorQueryFlags,
#
EvaluatorFlags,
EvaluatorQueryFlags,
#
Evaluator,
EvaluatorQuery,
EvaluatorRevisionsLog,
Expand Down Expand Up @@ -1435,11 +1425,33 @@ def _transfer_evaluator_revision_data(
else None
)
headers = None
# TODO: This function reconstructs output schemas from old evaluator settings.
# When fully migrating to the new workflow-based evaluator system, the output
# schema should be stored directly in the evaluator revision (workflow revision)
# at configuration time, rather than being inferred from settings here.
# For evaluators with dynamic outputs (auto_ai_critique, json_multi_field_match),
# the frontend/API should build and save the complete output schema when the
# user configures the evaluator.
outputs_schema = None
if str(old_evaluator.evaluator_key) == "auto_ai_critique":
json_schema = old_evaluator.settings_values.get("json_schema", None)
if json_schema and isinstance(json_schema, dict):
outputs_schema = json_schema.get("schema", None)
# Handle json_multi_field_match with dynamic field-based properties
if str(old_evaluator.evaluator_key) == "json_multi_field_match":
# Build dynamic properties based on configured fields
fields = old_evaluator.settings_values.get("fields", [])
properties = {"aggregate_score": {"type": "number"}}
for field in fields:
# Each field becomes a numeric score (0 or 1)
properties[field] = {"type": "number"}
outputs_schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"properties": properties,
"required": ["aggregate_score"],
"additionalProperties": False,
}
if not outputs_schema:
properties = (
{"score": {"type": "number"}, "success": {"type": "boolean"}}
Expand Down
1 change: 1 addition & 0 deletions api/oss/src/models/api/evaluation_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class LegacyEvaluator(BaseModel):
oss: Optional[bool] = False
requires_llm_api_keys: Optional[bool] = False
tags: List[str]
archived: Optional[bool] = False
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was forgotten



class EvaluatorConfig(BaseModel):
Expand Down
28 changes: 28 additions & 0 deletions api/oss/src/resources/evaluators/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@
"name": "JSON Field Match",
"key": "field_match_test",
"direct_use": False,
"archived": True, # Deprecated - use json_multi_field_match instead
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

old json evaluators will continue working + they will still be editable in the UI but no user can create new ones.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jp-agenta let's not remove the old evaluators by mistake, a lot of users rely on them, deprecating them completely is more pain than just keeping them indefinitely imo

"settings_template": {
"json_field": {
"label": "JSON Field",
Expand All @@ -355,6 +356,33 @@
"oss": True,
"tags": ["classifiers"],
},
{
"name": "JSON Multi-Field Match",
"key": "json_multi_field_match",
"direct_use": False,
"settings_template": {
"fields": {
"label": "Fields to Compare",
"type": "fields_tags_editor", # Custom type - tag-based add/remove editor
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this tells the UI how to show the playground

"required": True,
"description": "Add fields to compare using dot notation for nested paths (e.g., user.name)",
},
"correct_answer_key": {
"label": "Expected Answer Column",
"default": "correct_answer",
"type": "string",
"required": True,
"description": "Column name containing the expected JSON object",
"ground_truth_key": True,
"advanced": True, # Hidden in advanced section
},
},
"description": "Compares configured fields in expected JSON against LLM output. Each field becomes a separate metric (0 or 1), with an aggregate_score showing the percentage of matching fields. Useful for entity extraction validation.",
"requires_testcase": "always",
"requires_trace": "always",
"oss": True,
"tags": ["classifiers"],
},
{
"name": "JSON Diff Match",
"key": "auto_json_diff",
Expand Down
Loading