diff --git a/api/oss/src/core/evaluators/service.py b/api/oss/src/core/evaluators/service.py
index b0a76728f..c8ef56939 100644
--- a/api/oss/src/core/evaluators/service.py
+++ b/api/oss/src/core/evaluators/service.py
@@ -1,13 +1,9 @@
 from typing import Optional, List
 from uuid import UUID, uuid4
-from json import loads
 
 from oss.src.utils.helpers import get_slug_from_name_and_id
 from oss.src.services.db_manager import fetch_evaluator_config
 from oss.src.core.workflows.dtos import (
-    WorkflowFlags,
-    WorkflowQueryFlags,
-    #
     WorkflowCreate,
     WorkflowEdit,
     WorkflowQuery,
@@ -17,8 +13,6 @@
     WorkflowVariantEdit,
     WorkflowVariantQuery,
     #
-    WorkflowRevisionData,
-    #
     WorkflowRevisionCreate,
     WorkflowRevisionEdit,
     WorkflowRevisionCommit,
@@ -35,11 +29,7 @@
     SimpleEvaluatorEdit,
     SimpleEvaluatorQuery,
     SimpleEvaluatorFlags,
-    SimpleEvaluatorQueryFlags,
-    #
     EvaluatorFlags,
-    EvaluatorQueryFlags,
-    #
     Evaluator,
     EvaluatorQuery,
     EvaluatorRevisionsLog,
@@ -1435,11 +1425,33 @@ def _transfer_evaluator_revision_data(
             else None
         )
         headers = None
+        # TODO: This function reconstructs output schemas from old evaluator settings.
+        # When fully migrating to the new workflow-based evaluator system, the output
+        # schema should be stored directly in the evaluator revision (workflow revision)
+        # at configuration time, rather than being inferred from settings here.
+        # For evaluators with dynamic outputs (auto_ai_critique, json_multi_field_match),
+        # the frontend/API should build and save the complete output schema when the
+        # user configures the evaluator.
         outputs_schema = None
         if str(old_evaluator.evaluator_key) == "auto_ai_critique":
             json_schema = old_evaluator.settings_values.get("json_schema", None)
             if json_schema and isinstance(json_schema, dict):
                 outputs_schema = json_schema.get("schema", None)
+        # Handle json_multi_field_match with dynamic field-based properties
+        if str(old_evaluator.evaluator_key) == "json_multi_field_match":
+            # Build dynamic properties based on configured fields
+            fields = old_evaluator.settings_values.get("fields", [])
+            properties = {"aggregate_score": {"type": "number"}}
+            for field in fields:
+                # Each field becomes a numeric score (0 or 1)
+                properties[field] = {"type": "number"}
+            outputs_schema = {
+                "$schema": "https://json-schema.org/draft/2020-12/schema",
+                "type": "object",
+                "properties": properties,
+                "required": ["aggregate_score"],
+                "additionalProperties": False,
+            }
         if not outputs_schema:
             properties = (
                 {"score": {"type": "number"}, "success": {"type": "boolean"}}
diff --git a/api/oss/src/models/api/evaluation_model.py b/api/oss/src/models/api/evaluation_model.py
index 82e9f35cd..dc006e11d 100644
--- a/api/oss/src/models/api/evaluation_model.py
+++ b/api/oss/src/models/api/evaluation_model.py
@@ -20,6 +20,7 @@ class LegacyEvaluator(BaseModel):
     oss: Optional[bool] = False
     requires_llm_api_keys: Optional[bool] = False
     tags: List[str]
+    archived: Optional[bool] = False
 
 
 class EvaluatorConfig(BaseModel):
diff --git a/api/oss/src/resources/evaluators/evaluators.py b/api/oss/src/resources/evaluators/evaluators.py
index 13bf9617c..1e0eacdc8 100644
--- a/api/oss/src/resources/evaluators/evaluators.py
+++ b/api/oss/src/resources/evaluators/evaluators.py
@@ -332,6 +332,7 @@
         "name": "JSON Field Match",
         "key": "field_match_test",
         "direct_use": False,
+        "archived": True,  # Deprecated - use json_multi_field_match instead
         "settings_template": {
             "json_field": {
                 "label": "JSON Field",
@@ -355,6 +356,33 @@
         "oss": True,
         "tags": ["classifiers"],
     },
+    {
+        "name": "JSON Multi-Field Match",
+        "key": "json_multi_field_match",
+        "direct_use": False,
+        "settings_template": {
+            "fields": {
+                "label": "Fields to Compare",
+                "type": "fields_tags_editor",  # Custom type - tag-based add/remove editor
+                "required": True,
+                "description": "Add fields to compare using dot notation for nested paths (e.g., user.name)",
+            },
+            "correct_answer_key": {
+                "label": "Expected Answer Column",
+                "default": "correct_answer",
+                "type": "string",
+                "required": True,
+                "description": "Column name containing the expected JSON object",
+                "ground_truth_key": True,
+                "advanced": True,  # Hidden in advanced section
+            },
+        },
+        "description": "Compares configured fields in expected JSON against LLM output. Each field becomes a separate metric (0 or 1), with an aggregate_score showing the percentage of matching fields. Useful for entity extraction validation.",
+        "requires_testcase": "always",
+        "requires_trace": "always",
+        "oss": True,
+        "tags": ["classifiers"],
+    },
     {
         "name": "JSON Diff Match",
         "key": "auto_json_diff",
diff --git a/api/oss/src/services/evaluators_service.py b/api/oss/src/services/evaluators_service.py
index 2545a303c..bb93982be 100644
--- a/api/oss/src/services/evaluators_service.py
+++ b/api/oss/src/services/evaluators_service.py
@@ -1,34 +1,30 @@
-import re
 import json
+import re
 import traceback
-from typing import Any, Dict, Union, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
-import litellm
 import httpx
+import litellm
+from agenta.sdk.managers.secrets import SecretsManager
 from fastapi import HTTPException
 from openai import AsyncOpenAI
-
-# COMMENTED OUT: autoevals dependency removed
-# from autoevals.ragas import Faithfulness, ContextRelevancy
-
-from oss.src.utils.logging import get_module_logger
-from oss.src.services.security import sandbox
-from oss.src.models.shared_models import Error, Result
 from oss.src.models.api.evaluation_model import (
     EvaluatorInputInterface,
-    EvaluatorOutputInterface,
     EvaluatorMappingInputInterface,
     EvaluatorMappingOutputInterface,
+    EvaluatorOutputInterface,
 )
+from oss.src.models.shared_models import Error, Result
+from oss.src.services.security import sandbox
+
+# COMMENTED OUT: autoevals dependency removed
+# from autoevals.ragas import Faithfulness, ContextRelevancy
+from oss.src.utils.logging import get_module_logger
 from oss.src.utils.traces import (
-    remove_trace_prefix,
-    process_distributed_trace_into_trace_tree,
     get_field_value_from_trace_tree,
+    process_distributed_trace_into_trace_tree,
 )
 
-from agenta.sdk.managers.secrets import SecretsManager
-
-
 log = get_module_logger(__name__)
 
 
@@ -253,7 +249,7 @@ async def auto_exact_match(
                 message=str(e),
             ),
         )
-    except Exception as e:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except
         return Result(
             type="error",
             value=None,
@@ -352,6 +348,139 @@ async def field_match_test(input: EvaluatorInputInterface) -> EvaluatorOutputInt
     return {"outputs": {"success": result}}
 
 
+def get_nested_value(obj: Any, path: str) -> Any:
+    """
+    Get value from nested object using resolve_any() with graceful None on failure.
+
+    Supports multiple path formats:
+        - Dot notation: "user.address.city", "items.0.name"
+        - JSON Path: "$.user.address.city", "$.items[0].name"
+        - JSON Pointer: "/user/address/city", "/items/0/name"
+
+    Args:
+        obj: The object to traverse (dict or nested structure)
+        path: Path expression in any supported format
+
+    Returns:
+        The value at the specified path, or None if path doesn't exist or resolution fails
+    """
+    if obj is None:
+        return None
+
+    try:
+        return resolve_any(path, obj)
+    except (KeyError, IndexError, ValueError, TypeError, ImportError):
+        return None
+
+
+async def auto_json_multi_field_match(
+    inputs: Dict[str, Any],  # pylint: disable=unused-argument
+    output: Union[str, Dict[str, Any]],
+    data_point: Dict[str, Any],
+    app_params: Dict[str, Any],  # pylint: disable=unused-argument
+    settings_values: Dict[str, Any],
+    lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
+) -> Result:
+    """
+    Evaluator that compares multiple configured fields in expected JSON against LLM output JSON.
+    Each configured field becomes a separate score in the output.
+
+    Returns a Result with:
+    - type="object" containing one score per configured field plus overall score
+    - Each field score is 1.0 (match) or 0.0 (no match)
+    - Overall 'score' is the average of all field scores
+    """
+    try:
+        output = validate_string_output("json_multi_field_match", output)
+        correct_answer = get_correct_answer(data_point, settings_values)
+        eval_inputs = {"ground_truth": correct_answer, "prediction": output}
+        response = await json_multi_field_match(
+            input=EvaluatorInputInterface(
+                **{"inputs": eval_inputs, "settings": settings_values}
+            )
+        )
+        return Result(type="object", value=response["outputs"])
+    except ValueError as e:
+        return Result(
+            type="error",
+            value=None,
+            error=Error(
+                message=str(e),
+            ),
+        )
+    except Exception:
+        return Result(
+            type="error",
+            value=None,
+            error=Error(
+                message="Error during JSON Multi-Field Match evaluation",
+                stacktrace=str(traceback.format_exc()),
+            ),
+        )
+
+
+async def json_multi_field_match(
+    input: EvaluatorInputInterface,
+) -> EvaluatorOutputInterface:
+    """
+    Compare configured fields in expected JSON against LLM output JSON.
+    Each configured field becomes a separate score in the output.
+
+    Args:
+        input: EvaluatorInputInterface with:
+            - inputs.prediction: JSON string from LLM output
+            - inputs.ground_truth: JSON string from test data column
+            - settings.fields: List of field paths (strings) e.g., ["name", "email", "user.address.city"]
+
+    Returns:
+        EvaluatorOutputInterface with one score per configured field plus overall score
+    """
+    fields = input.settings.get("fields", [])
+
+    if not fields:
+        raise ValueError("No fields configured for comparison")
+
+    # Parse both JSON objects
+    prediction = input.inputs.get("prediction", "")
+    ground_truth = input.inputs.get("ground_truth", "")
+
+    try:
+        if isinstance(ground_truth, str):
+            expected = json.loads(ground_truth)
+        else:
+            expected = ground_truth
+    except json.JSONDecodeError as e:
+        raise ValueError(f"Invalid JSON in ground truth: {str(e)}")
+
+    try:
+        if isinstance(prediction, str):
+            actual = json.loads(prediction)
+        else:
+            actual = prediction
+    except json.JSONDecodeError as e:
+        raise ValueError(f"Invalid JSON in prediction: {str(e)}")
+
+    results: Dict[str, Any] = {}
+    matches = 0
+
+    for field_path in fields:
+        # Support nested fields with dot notation
+        expected_val = get_nested_value(expected, field_path)
+        actual_val = get_nested_value(actual, field_path)
+
+        # Exact match comparison (v1 - always exact)
+        match = expected_val == actual_val
+
+        results[field_path] = 1.0 if match else 0.0
+        if match:
+            matches += 1
+
+    # Aggregate score is the percentage of matching fields
+    results["aggregate_score"] = matches / len(fields) if fields else 0.0
+
+    return {"outputs": results}
+
+
 async def auto_webhook_test(
     inputs: Dict[str, Any],
     output: Union[str, Dict[str, Any]],
@@ -435,7 +564,7 @@ async def auto_custom_code_run(
             )
         )
         return Result(type="number", value=response["outputs"]["score"])
-    except Exception as e:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except
         return Result(
             type="error",
             value=None,
@@ -504,7 +633,7 @@ async def auto_ai_critique(
             )
         )
         return Result(type="number", value=response["outputs"]["score"])
-    except Exception as e:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except
         return Result(
             type="error",
             value=None,
@@ -515,9 +644,7 @@ async def auto_ai_critique(
         )
 
 
-import json
-import re
-from typing import Any, Dict, Iterable, Tuple, Optional
+from typing import Any, Dict, Iterable, Tuple
 
 try:
     import jsonpath  # ✅ use module API
@@ -1154,7 +1281,7 @@ async def auto_starts_with(
             )
         )
         return Result(type="bool", value=response["outputs"]["success"])
-    except Exception as e:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except
         return Result(
             type="error",
             value=None,
@@ -1196,7 +1323,7 @@ async def auto_ends_with(
         )
         result = Result(type="bool", value=response["outputs"]["success"])
         return result
-    except Exception as e:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except
         return Result(
             type="error",
             value=None,
@@ -1238,7 +1365,7 @@ async def auto_contains(
         )
         result = Result(type="bool", value=response["outputs"]["success"])
         return result
-    except Exception as e:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except
         return Result(
             type="error",
             value=None,
@@ -1280,7 +1407,7 @@ async def auto_contains_any(
         )
         result = Result(type="bool", value=response["outputs"]["success"])
         return result
-    except Exception as e:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except
         return Result(
             type="error",
             value=None,
@@ -1323,7 +1450,7 @@ async def auto_contains_all(
         )
         result = Result(type="bool", value=response["outputs"]["success"])
         return result
-    except Exception as e:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except
         return Result(
             type="error",
             value=None,
@@ -1371,7 +1498,7 @@ async def auto_contains_json(
             input=EvaluatorInputInterface(**{"inputs": {"prediction": output}})
         )
         return Result(type="bool", value=response["outputs"]["success"])
-    except Exception as e:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except
         return Result(
             type="error",
             value=None,
@@ -1389,7 +1516,7 @@ async def contains_json(input: EvaluatorInputInterface) -> EvaluatorOutputInterf
         potential_json = str(input.inputs["prediction"])[start_index:end_index]
         json.loads(potential_json)
         contains_json = True
-    except (ValueError, json.JSONDecodeError) as e:
+    except (ValueError, json.JSONDecodeError):
         contains_json = False
 
     return {"outputs": {"success": contains_json}}
@@ -1852,7 +1979,7 @@ async def auto_levenshtein_distance(
                 message=str(e),
             ),
         )
-    except Exception as e:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except
         return Result(
             type="error",
             value=None,
@@ -1892,7 +2019,7 @@ async def auto_similarity_match(
                 message=str(e),
             ),
         )
-    except Exception as e:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except
         return Result(
             type="error",
             value=None,
@@ -2002,6 +2129,7 @@ async def auto_semantic_similarity(
     "auto_exact_match": auto_exact_match,
     "auto_regex_test": auto_regex_test,
     "field_match_test": auto_field_match_test,
+    "json_multi_field_match": auto_json_multi_field_match,
     "auto_webhook_test": auto_webhook_test,
     "auto_custom_code_run": auto_custom_code_run,
     "auto_ai_critique": auto_ai_critique,
@@ -2024,6 +2152,7 @@ async def auto_semantic_similarity(
     "auto_exact_match": exact_match,
     "auto_regex_test": regex_test,
     "field_match_test": field_match_test,
+    "json_multi_field_match": json_multi_field_match,
     "auto_webhook_test": webhook_test,
     "auto_custom_code_run": custom_code_run,
     "auto_ai_critique": ai_critique,
diff --git a/sdk/agenta/sdk/workflows/configurations.py b/sdk/agenta/sdk/workflows/configurations.py
index 9086047c5..42310b936 100644
--- a/sdk/agenta/sdk/workflows/configurations.py
+++ b/sdk/agenta/sdk/workflows/configurations.py
@@ -5,6 +5,7 @@
 auto_exact_match_v0_configuration = WorkflowServiceConfiguration()
 auto_regex_test_v0_configuration = WorkflowServiceConfiguration()
 field_match_test_v0_configuration = WorkflowServiceConfiguration()
+json_multi_field_match_v0_configuration = WorkflowServiceConfiguration()
 auto_webhook_test_v0_configuration = WorkflowServiceConfiguration()
 auto_custom_code_run_v0_configuration = WorkflowServiceConfiguration()
 auto_ai_critique_v0_configuration = WorkflowServiceConfiguration()
diff --git a/sdk/agenta/sdk/workflows/handlers.py b/sdk/agenta/sdk/workflows/handlers.py
index b7b28080b..3ef2faf3a 100644
--- a/sdk/agenta/sdk/workflows/handlers.py
+++ b/sdk/agenta/sdk/workflows/handlers.py
@@ -1,14 +1,14 @@
-from typing import List, Any, Optional, Any, Dict, Union
-from json import dumps, loads
-import traceback
 import json
-import re
 import math
+import re
+import traceback
+from difflib import SequenceMatcher
+from json import dumps, loads
+from typing import Any, Dict, List, Optional, Union
 
 import httpx
 
 from pydantic import BaseModel, Field
-from difflib import SequenceMatcher
 
 from agenta.sdk.utils.logging import get_module_logger
 from agenta.sdk.utils.lazy import (
@@ -21,33 +21,30 @@
 from agenta.sdk.litellm import mockllm
 from agenta.sdk.types import PromptTemplate, Message
 from agenta.sdk.managers.secrets import SecretsManager
-
 from agenta.sdk.decorators.tracing import instrument
-
+from agenta.sdk.litellm.litellm import litellm_handler
 from agenta.sdk.models.shared import Data
-from agenta.sdk.models.tracing import Trace
-from agenta.sdk.workflows.sandbox import execute_code_safely
 from agenta.sdk.workflows.errors import (
+    CustomCodeServerV0Error,
     InvalidConfigurationParametersV0Error,
-    MissingConfigurationParameterV0Error,
     InvalidConfigurationParameterV0Error,
     InvalidInputsV0Error,
-    MissingInputV0Error,
     InvalidInputV0Error,
     InvalidOutputsV0Error,
-    MissingOutputV0Error,
     InvalidSecretsV0Error,
     JSONDiffV0Error,
     LevenshteinDistanceV0Error,
-    SyntacticSimilarityV0Error,
+    MissingConfigurationParameterV0Error,
+    MissingInputV0Error,
+    PromptCompletionV0Error,
+    PromptFormattingV0Error,
+    RegexPatternV0Error,
     SemanticSimilarityV0Error,
-    WebhookServerV0Error,
+    SyntacticSimilarityV0Error,
     WebhookClientV0Error,
-    CustomCodeServerV0Error,
-    RegexPatternV0Error,
-    PromptFormattingV0Error,
-    PromptCompletionV0Error,
+    WebhookServerV0Error,
 )
+from agenta.sdk.workflows.sandbox import execute_code_safely
 
 log = get_module_logger(__name__)
 
@@ -57,7 +54,6 @@ def _configure_litellm():
     litellm = _load_litellm()
     if not litellm:
         raise ImportError("litellm is required for completion handling.")
-    from agenta.sdk.litellm.litellm import litellm_handler
 
     litellm.logging = False
     litellm.set_verbose = False
@@ -85,9 +81,7 @@ def _compute_similarity(embedding_1: List[float], embedding_2: List[float]) -> f
     return dot / (norm1 * norm2)
 
 
-import json
-import re
-from typing import Any, Dict, Iterable, Tuple, Optional
+from typing import Any, Iterable, Tuple
 
 
 # ========= Scheme detection =========
@@ -392,7 +386,7 @@ def auto_exact_match_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "correct_answer_key" in parameters:
+    if "correct_answer_key" not in parameters:
         raise MissingConfigurationParameterV0Error(path="correct_answer_key")
 
     correct_answer_key = str(parameters["correct_answer_key"])
@@ -400,7 +394,7 @@ def auto_exact_match_v0(
     if inputs is None or not isinstance(inputs, dict):
         raise InvalidInputsV0Error(expected="dict", got=inputs)
 
-    if not correct_answer_key in inputs:
+    if correct_answer_key not in inputs:
         raise MissingInputV0Error(path=correct_answer_key)
 
     correct_answer = inputs[correct_answer_key]
@@ -437,7 +431,7 @@ def auto_regex_test_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "regex_pattern" in parameters:
+    if "regex_pattern" not in parameters:
         raise MissingConfigurationParameterV0Error(path="regex_pattern")
 
     regex_pattern = parameters["regex_pattern"]
@@ -495,12 +489,12 @@ def field_match_test_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "json_field" in parameters:
+    if "json_field" not in parameters:
         raise MissingConfigurationParameterV0Error(path="json_field")
 
     json_field = str(parameters["json_field"])
 
-    if not "correct_answer_key" in parameters:
+    if "correct_answer_key" not in parameters:
         raise MissingConfigurationParameterV0Error(path="correct_answer_key")
 
     correct_answer_key = str(parameters["correct_answer_key"])
@@ -508,7 +502,7 @@ def field_match_test_v0(
     if inputs is None or not isinstance(inputs, dict):
         raise InvalidInputsV0Error(expected="dict", got=inputs)
 
-    if not correct_answer_key in inputs:
+    if correct_answer_key not in inputs:
         raise MissingInputV0Error(path=correct_answer_key)
 
     correct_answer = inputs[correct_answer_key]
@@ -521,7 +515,7 @@ def field_match_test_v0(
     if isinstance(outputs, str):
         try:
             outputs_dict = loads(outputs)
-        except json.JSONDecodeError as e:
+        except json.JSONDecodeError:
             # raise InvalidOutputsV0Error(expected="dict", got=outputs) from e
             return {"success": False}
 
@@ -529,7 +523,7 @@ def field_match_test_v0(
         # raise InvalidOutputsV0Error(expected=["dict", "str"], got=outputs)
         return {"success": False}
 
-    if not json_field in outputs_dict:
+    if json_field not in outputs_dict:
         # raise MissingOutputV0Error(path=json_field)
         return {"success": False}
 
@@ -540,6 +534,148 @@ def field_match_test_v0(
     return {"success": success}
 
 
+def _get_nested_value(obj: Any, path: str) -> Any:
+    """
+    Get value from nested object using resolve_any() with graceful None on failure.
+
+    Supports multiple path formats:
+        - Dot notation: "user.address.city", "items.0.name"
+        - JSON Path: "$.user.address.city", "$.items[0].name"
+        - JSON Pointer: "/user/address/city", "/items/0/name"
+
+    Args:
+        obj: The object to traverse (dict or list)
+        path: Path expression in any supported format
+
+    Returns:
+        The value at the path, or None if path doesn't exist or resolution fails
+    """
+    if obj is None:
+        return None
+
+    try:
+        return resolve_any(path, obj)
+    except (KeyError, IndexError, ValueError, TypeError, ImportError):
+        return None
+
+
+@instrument(annotate=True)
+def json_multi_field_match_v0(
+    parameters: Optional[Data] = None,
+    inputs: Optional[Data] = None,
+    outputs: Optional[Union[Data, str]] = None,
+) -> Any:
+    """
+    Multi-field JSON match evaluator for comparing multiple fields between expected and actual JSON.
+
+    Each configured field becomes a separate score (0 or 1), and an aggregate_score shows
+    the percentage of matching fields. Useful for entity extraction validation.
+
+    Args:
+        inputs: Testcase data with ground truth JSON
+        outputs: Output from the workflow execution (expected to be JSON string or dict)
+        parameters: Configuration with:
+            - fields: List of field paths to compare (e.g., ["name", "user.address.city"])
+            - correct_answer_key: Key in inputs containing the expected JSON
+
+    Returns:
+        Dict with per-field scores and aggregate_score, e.g.:
+        {"name": 1.0, "email": 0.0, "aggregate_score": 0.5}
+    """
+    if parameters is None or not isinstance(parameters, dict):
+        raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
+
+    if "fields" not in parameters:
+        raise MissingConfigurationParameterV0Error(path="fields")
+
+    fields = parameters["fields"]
+
+    if not isinstance(fields, list) or len(fields) == 0:
+        raise InvalidConfigurationParameterV0Error(
+            path="fields",
+            expected="non-empty list",
+            got=fields,
+        )
+
+    if "correct_answer_key" not in parameters:
+        raise MissingConfigurationParameterV0Error(path="correct_answer_key")
+
+    correct_answer_key = str(parameters["correct_answer_key"])
+
+    if inputs is None or not isinstance(inputs, dict):
+        raise InvalidInputsV0Error(expected="dict", got=inputs)
+
+    if correct_answer_key not in inputs:
+        raise MissingInputV0Error(path=correct_answer_key)
+
+    correct_answer = inputs[correct_answer_key]
+
+    # Parse ground truth JSON
+    if isinstance(correct_answer, str):
+        try:
+            expected = json.loads(correct_answer)
+        except json.JSONDecodeError:
+            raise InvalidInputV0Error(
+                path=correct_answer_key,
+                expected="valid JSON string",
+                got=correct_answer,
+            )
+    elif isinstance(correct_answer, dict):
+        expected = correct_answer
+    else:
+        raise InvalidInputV0Error(
+            path=correct_answer_key,
+            expected=["dict", "str"],
+            got=correct_answer,
+        )
+
+    # Parse output JSON
+    if not isinstance(outputs, str) and not isinstance(outputs, dict):
+        # Return all zeros if output is invalid
+        results: Dict[str, Any] = {field: 0.0 for field in fields}
+        results["aggregate_score"] = 0.0
+        return results
+
+    if isinstance(outputs, str):
+        try:
+            actual = json.loads(outputs)
+        except json.JSONDecodeError:
+            # Return all zeros if output is not valid JSON
+            results = {field: 0.0 for field in fields}
+            results["aggregate_score"] = 0.0
+            return results
+    else:
+        actual = outputs
+
+    if not isinstance(actual, dict):
+        # Return all zeros if parsed output is not a dict
+        results = {field: 0.0 for field in fields}
+        results["aggregate_score"] = 0.0
+        return results
+
+    # --------------------------------------------------------------------------
+    # Compare each configured field
+    results = {}
+    matches = 0
+
+    for field_path in fields:
+        expected_val = _get_nested_value(expected, field_path)
+        actual_val = _get_nested_value(actual, field_path)
+
+        # Exact match comparison
+        match = expected_val == actual_val
+
+        results[field_path] = 1.0 if match else 0.0
+        if match:
+            matches += 1
+
+    # Aggregate score is the percentage of matching fields
+    results["aggregate_score"] = matches / len(fields) if fields else 0.0
+    # --------------------------------------------------------------------------
+
+    return results
+
+
 @instrument(annotate=True)
 async def auto_webhook_test_v0(
     parameters: Optional[Data] = None,
@@ -560,12 +696,12 @@ async def auto_webhook_test_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "webhook_url" in parameters:
+    if "webhook_url" not in parameters:
         raise MissingConfigurationParameterV0Error(path="webhook_url")
 
     webhook_url = str(parameters["webhook_url"])
 
-    if not "correct_answer_key" in parameters:
+    if "correct_answer_key" not in parameters:
         raise MissingConfigurationParameterV0Error(path="correct_answer_key")
 
     correct_answer_key = str(parameters["correct_answer_key"])
@@ -573,7 +709,7 @@ async def auto_webhook_test_v0(
     if inputs is None or not isinstance(inputs, dict):
         raise InvalidInputsV0Error(expected="dict", got=inputs)
 
-    if not correct_answer_key in inputs:
+    if correct_answer_key not in inputs:
         raise MissingInputV0Error(path=correct_answer_key)
 
     correct_answer = inputs[correct_answer_key]
@@ -665,12 +801,12 @@ async def auto_custom_code_run_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "code" in parameters:
+    if "code" not in parameters:
         raise MissingConfigurationParameterV0Error(path="code")
 
     code = str(parameters["code"])
 
-    if not "correct_answer_key" in parameters:
+    if "correct_answer_key" not in parameters:
         raise MissingConfigurationParameterV0Error(path="correct_answer_key")
 
     correct_answer_key = str(parameters["correct_answer_key"])
@@ -678,7 +814,7 @@ async def auto_custom_code_run_v0(
     if inputs is None or not isinstance(inputs, dict):
         raise InvalidInputsV0Error(expected="dict", got=inputs)
 
-    if not correct_answer_key in inputs:
+    if correct_answer_key not in inputs:
         raise MissingInputV0Error(path=correct_answer_key)
 
     correct_answer = inputs[correct_answer_key]
@@ -756,7 +892,7 @@ async def auto_ai_critique_v0(
 
     correct_answer_key = parameters.get("correct_answer_key")
 
-    if not "prompt_template" in parameters:
+    if "prompt_template" not in parameters:
         raise MissingConfigurationParameterV0Error(path="prompt_template")
 
     prompt_template = parameters.get("prompt_template")
@@ -787,7 +923,7 @@ async def auto_ai_critique_v0(
         "json_schema" if template_version == "4" else "text"
     )
 
-    if not response_type in ["text", "json_object", "json_schema"]:
+    if response_type not in ["text", "json_object", "json_schema"]:
         raise InvalidConfigurationParameterV0Error(
             path="response_type",
             expected=["text", "json_object", "json_schema"],
@@ -992,7 +1128,7 @@ def auto_starts_with_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "prefix" in parameters:
+    if "prefix" not in parameters:
         raise MissingConfigurationParameterV0Error(path="prefix")
 
     prefix = parameters["prefix"]
@@ -1041,7 +1177,7 @@ def auto_ends_with_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "suffix" in parameters:
+    if "suffix" not in parameters:
         raise MissingConfigurationParameterV0Error(path="suffix")
 
     suffix = parameters["suffix"]
@@ -1090,7 +1226,7 @@ def auto_contains_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "substring" in parameters:
+    if "substring" not in parameters:
         raise MissingConfigurationParameterV0Error(path="substring")
 
     substring = parameters["substring"]
@@ -1139,7 +1275,7 @@ def auto_contains_any_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "substrings" in parameters:
+    if "substrings" not in parameters:
         raise MissingConfigurationParameterV0Error(path="substrings")
 
     substrings = parameters["substrings"]
@@ -1197,7 +1333,7 @@ def auto_contains_all_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "substrings" in parameters:
+    if "substrings" not in parameters:
         raise MissingConfigurationParameterV0Error(path="substrings")
 
     substrings = parameters["substrings"]
@@ -1297,7 +1433,7 @@ def auto_json_diff_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "correct_answer_key" in parameters:
+    if "correct_answer_key" not in parameters:
         raise MissingConfigurationParameterV0Error(path="correct_answer_key")
 
     correct_answer_key = str(parameters["correct_answer_key"])
@@ -1305,7 +1441,7 @@ def auto_json_diff_v0(
     if inputs is None or not isinstance(inputs, dict):
         raise InvalidInputsV0Error(expected="dict", got=inputs)
 
-    if not correct_answer_key in inputs:
+    if correct_answer_key not in inputs:
         raise MissingInputV0Error(path=correct_answer_key)
 
     correct_answer = inputs[correct_answer_key]
@@ -1389,7 +1525,7 @@ def auto_levenshtein_distance_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "correct_answer_key" in parameters:
+    if "correct_answer_key" not in parameters:
         raise MissingConfigurationParameterV0Error(path="correct_answer_key")
 
     correct_answer_key = str(parameters["correct_answer_key"])
@@ -1399,7 +1535,7 @@ def auto_levenshtein_distance_v0(
     if inputs is None or not isinstance(inputs, dict):
         raise InvalidInputsV0Error(expected="dict", got=inputs)
 
-    if not correct_answer_key in inputs:
+    if correct_answer_key not in inputs:
         raise MissingInputV0Error(path=correct_answer_key)
 
     correct_answer = inputs[correct_answer_key]
@@ -1494,7 +1630,7 @@ def auto_similarity_match_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "correct_answer_key" in parameters:
+    if "correct_answer_key" not in parameters:
         raise MissingConfigurationParameterV0Error(path="correct_answer_key")
 
     correct_answer_key = str(parameters["correct_answer_key"])
@@ -1504,7 +1640,7 @@ def auto_similarity_match_v0(
     if inputs is None or not isinstance(inputs, dict):
         raise InvalidInputsV0Error(expected="dict", got=inputs)
 
-    if not correct_answer_key in inputs:
+    if correct_answer_key not in inputs:
         raise MissingInputV0Error(path=correct_answer_key)
 
     correct_answer = inputs[correct_answer_key]
@@ -1587,7 +1723,7 @@ async def auto_semantic_similarity_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "correct_answer_key" in parameters:
+    if "correct_answer_key" not in parameters:
         raise MissingConfigurationParameterV0Error(path="correct_answer_key")
 
     correct_answer_key = str(parameters["correct_answer_key"])
@@ -1600,7 +1736,7 @@ async def auto_semantic_similarity_v0(
     if inputs is None or not isinstance(inputs, dict):
         raise InvalidInputsV0Error(expected="dict", got=inputs)
 
-    if not correct_answer_key in inputs:
+    if correct_answer_key not in inputs:
         raise MissingInputV0Error(path=correct_answer_key)
 
     correct_answer = inputs[correct_answer_key]
@@ -1703,7 +1839,7 @@ async def completion_v0(
     if parameters is None or not isinstance(parameters, dict):
         raise InvalidConfigurationParametersV0Error(expected="dict", got=parameters)
 
-    if not "prompt" in parameters:
+    if "prompt" not in parameters:
         raise MissingConfigurationParameterV0Error(path="prompt")
 
     params: Dict[str, Any] = {**(parameters or {})}
diff --git a/sdk/agenta/sdk/workflows/interfaces.py b/sdk/agenta/sdk/workflows/interfaces.py
index 85334ab6c..6c1e5edfb 100644
--- a/sdk/agenta/sdk/workflows/interfaces.py
+++ b/sdk/agenta/sdk/workflows/interfaces.py
@@ -169,6 +169,53 @@
     ),
 )
 
+json_multi_field_match_v0_interface = WorkflowServiceInterface(
+    uri="agenta:built-in:json_multi_field_match:v0",
+    schemas=dict(  # type: ignore
+        parameters={
+            "type": "object",
+            "title": "JSON Multi-Field Match Parameters",
+            "description": "Settings for comparing multiple JSON fields against expected values from a ground truth column.",
+            "properties": {
+                "correct_answer_key": {
+                    "type": "string",
+                    "title": "Ground Truth Column",
+                    "description": "Column in test data containing the JSON ground truth.",
+                    "default": "correct_answer",
+                },
+                "fields": {
+                    "type": "array",
+                    "title": "Fields to Compare",
+                    "description": "List of JSON field paths (dot notation) to compare. Each field becomes a separate score.",
+                    "items": {"type": "string"},
+                    "default": [],
+                },
+            },
+            "required": ["correct_answer_key", "fields"],
+            "additionalProperties": False,
+        },
+        inputs={
+            "type": "object",
+            "title": "JSON Multi-Field Match Inputs",
+            "description": "Testcase data including the JSON ground truth.",
+        },
+        outputs={
+            "type": "object",
+            "title": "JSON Multi-Field Match Outputs",
+            "description": "Per-field match scores and aggregate score. Each field produces a 0 or 1 output.",
+            "properties": {
+                "aggregate_score": {
+                    "type": "number",
+                    "title": "Aggregate Score",
+                    "description": "Percentage of matched fields (0-1).",
+                },
+            },
+            "required": ["aggregate_score"],
+            "additionalProperties": True,  # Allows dynamic field outputs
+        },
+    ),
+)
+
 auto_webhook_test_v0_interface = WorkflowServiceInterface(
     uri="agenta:built-in:auto_webhook_test:v0",
     schemas=dict(  # type: ignore
diff --git a/sdk/agenta/sdk/workflows/utils.py b/sdk/agenta/sdk/workflows/utils.py
index d86f499da..2ecd57d21 100644
--- a/sdk/agenta/sdk/workflows/utils.py
+++ b/sdk/agenta/sdk/workflows/utils.py
@@ -9,6 +9,7 @@
     auto_exact_match_v0,
     auto_regex_test_v0,
     field_match_test_v0,
+    json_multi_field_match_v0,
     auto_webhook_test_v0,
     auto_custom_code_run_v0,
     auto_ai_critique_v0,
@@ -31,6 +32,7 @@
     auto_exact_match_v0_interface,
     auto_regex_test_v0_interface,
     field_match_test_v0_interface,
+    json_multi_field_match_v0_interface,
     auto_webhook_test_v0_interface,
     auto_custom_code_run_v0_interface,
     auto_ai_critique_v0_interface,
@@ -54,6 +56,7 @@
     auto_exact_match_v0_configuration,
     auto_regex_test_v0_configuration,
     field_match_test_v0_configuration,
+    json_multi_field_match_v0_configuration,
     auto_webhook_test_v0_configuration,
     auto_custom_code_run_v0_configuration,
     auto_ai_critique_v0_configuration,
@@ -78,6 +81,7 @@
             auto_exact_match=dict(v0=auto_exact_match_v0_interface),
             auto_regex_test=dict(v0=auto_regex_test_v0_interface),
             field_match_test=dict(v0=field_match_test_v0_interface),
+            json_multi_field_match=dict(v0=json_multi_field_match_v0_interface),
             auto_webhook_test=dict(v0=auto_webhook_test_v0_interface),
             auto_custom_code_run=dict(v0=auto_custom_code_run_v0_interface),
             auto_ai_critique=dict(v0=auto_ai_critique_v0_interface),
@@ -104,6 +108,7 @@
             auto_exact_match=dict(v0=auto_exact_match_v0_configuration),
             auto_regex_test=dict(v0=auto_regex_test_v0_configuration),
             field_match_test=dict(v0=field_match_test_v0_configuration),
+            json_multi_field_match=dict(v0=json_multi_field_match_v0_configuration),
             auto_webhook_test=dict(v0=auto_webhook_test_v0_configuration),
             auto_custom_code_run=dict(v0=auto_custom_code_run_v0_configuration),
             auto_ai_critique=dict(v0=auto_ai_critique_v0_configuration),
@@ -160,6 +165,7 @@
             auto_exact_match=dict(v0=auto_exact_match_v0),
             auto_regex_test=dict(v0=auto_regex_test_v0),
             field_match_test=dict(v0=field_match_test_v0),
+            json_multi_field_match=dict(v0=json_multi_field_match_v0),
             auto_webhook_test=dict(v0=auto_webhook_test_v0),
             auto_custom_code_run=dict(v0=auto_custom_code_run_v0),
             auto_ai_critique=dict(v0=auto_ai_critique_v0),
diff --git a/web/oss/src/components/Evaluators/assets/evaluatorFiltering.ts b/web/oss/src/components/Evaluators/assets/evaluatorFiltering.ts
index ec01f427c..ff617479c 100644
--- a/web/oss/src/components/Evaluators/assets/evaluatorFiltering.ts
+++ b/web/oss/src/components/Evaluators/assets/evaluatorFiltering.ts
@@ -20,6 +20,7 @@ export const ENABLED_EVALUATORS = [
     "auto_semantic_similarity",
     "auto_regex_test",
     "field_match_test",
+    "json_multi_field_match",
     "auto_json_diff",
     "auto_ai_critique",
     "auto_custom_code_run",
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx
index 3098026f1..85f09fd17 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx
@@ -11,6 +11,7 @@ import {isValidRegex} from "@/oss/lib/helpers/validators"
 import {generatePaths} from "@/oss/lib/transformers"
 import {EvaluationSettingsTemplate, JSSTheme} from "@/oss/lib/Types"
 
+import {FieldsTagsEditor} from "./FieldsTagsEditor"
 import {JSONSchemaEditor} from "./JSONSchema"
 import {Messages} from "./Messages"
 
@@ -215,6 +216,8 @@ export const DynamicFormField: React.FC<DynamicFormFieldProps> = ({
                                     : JSON.stringify(savedValue ?? {}, null, 2)
                             }
                         />
+                    ) : type === "fields_tags_editor" ? (
+                        <FieldsTagsEditor form={form} name={name} />
                     ) : null}
                 </Form.Item>
             )}
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/FieldsTagsEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/FieldsTagsEditor.tsx
new file mode 100644
index 000000000..a96a07a37
--- /dev/null
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/FieldsTagsEditor.tsx
@@ -0,0 +1,228 @@
+/**
+ * FieldsTagsEditor - Tag-based editor for JSON field paths
+ *
+ * This component provides an add/remove interface for managing JSON field paths.
+ * Users can:
+ * - Add fields manually using an input field (supports dot notation for nested paths)
+ * - Remove fields by clicking the X button on tags
+ * - Detect fields from the selected testcase using a dedicated button
+ *
+ * The component also displays a non-removable "overall" field representing
+ * the aggregate result across all fields.
+ *
+ * Auto-detection behavior:
+ * - When a testcase is loaded and no fields are configured, fields are auto-detected
+ */
+
+import {useCallback, useEffect, useMemo, useRef, useState} from "react"
+
+import {PlusOutlined, SearchOutlined} from "@ant-design/icons"
+import {Button, Form, Input, Tag, Tooltip, Typography} from "antd"
+import type {FormInstance} from "antd/es/form"
+import {useAtomValue} from "jotai"
+
+import {extractJsonPaths, safeParseJson} from "@/oss/lib/helpers/extractJsonPaths"
+
+import {playgroundSelectedTestcaseAtom} from "./state/atoms"
+
+const {Text} = Typography
+
+interface FieldsTagsEditorProps {
+    value?: string[]
+    onChange?: (value: string[]) => void
+    form?: FormInstance
+    name?: string | string[]
+    correctAnswerKey?: string
+}
+
+/**
+ * Tag-based editor for managing JSON field paths with add/remove functionality.
+ * Includes "Detect from testcase" feature to auto-populate fields.
+ */
+export const FieldsTagsEditor: React.FC<FieldsTagsEditorProps> = ({
+    value = [],
+    onChange,
+    form,
+    correctAnswerKey = "correct_answer",
+}) => {
+    const [inputValue, setInputValue] = useState("")
+    // Track if we've already auto-detected to avoid re-triggering
+    const hasAutoDetectedRef = useRef(false)
+
+    // Read the selected testcase from the playground atom
+    const testcaseSelection = useAtomValue(playgroundSelectedTestcaseAtom)
+    const testcase = testcaseSelection?.testcase
+
+    // Watch the correct_answer_key from form to react to changes
+    // Using Form.useWatch instead of form.getFieldValue for reactivity
+    const formCorrectAnswerKey = Form.useWatch(["settings_values", "correct_answer_key"], form)
+    const effectiveKey = formCorrectAnswerKey || correctAnswerKey
+
+    // Check if we can detect fields from testcase
+    const canDetectFields = useMemo(() => {
+        if (!testcase) return false
+        const groundTruthValue = testcase[effectiveKey]
+        if (!groundTruthValue) return false
+        const parsed = safeParseJson(groundTruthValue)
+        return parsed !== null
+    }, [testcase, effectiveKey])
+
+    // Extract available fields from the testcase
+    const detectableFields = useMemo(() => {
+        if (!testcase) return []
+        const groundTruthValue = testcase[effectiveKey]
+        if (!groundTruthValue) return []
+        const parsed = safeParseJson(groundTruthValue)
+        if (!parsed) return []
+        return extractJsonPaths(parsed)
+    }, [testcase, effectiveKey])
+
+    // Auto-detect fields when testcase is loaded and no fields are configured
+    useEffect(() => {
+        // Only auto-detect if:
+        // 1. We haven't already auto-detected
+        // 2. There are no user-defined fields
+        // 3. We can detect fields from the testcase
+        if (!hasAutoDetectedRef.current && value.length === 0 && detectableFields.length > 0) {
+            hasAutoDetectedRef.current = true
+            onChange?.(detectableFields)
+        }
+    }, [detectableFields, value.length, onChange])
+
+    // Handle adding a new field
+    const handleAddField = useCallback(() => {
+        const trimmed = inputValue.trim()
+        if (!trimmed) return
+
+        // Don't add duplicates
+        if (value.includes(trimmed)) {
+            setInputValue("")
+            return
+        }
+
+        // Don't allow reserved field names
+        if (trimmed === "aggregate_score") {
+            setInputValue("")
+            return
+        }
+
+        onChange?.([...value, trimmed])
+        setInputValue("")
+    }, [inputValue, value, onChange])
+
+    // Handle removing a field
+    const handleRemoveField = useCallback(
+        (fieldToRemove: string) => {
+            onChange?.(value.filter((f) => f !== fieldToRemove))
+        },
+        [value, onChange],
+    )
+
+    // Handle detecting fields from testcase (replaces existing fields)
+    const handleDetectFields = useCallback(() => {
+        if (detectableFields.length > 0) {
+            onChange?.(detectableFields)
+        }
+    }, [detectableFields, onChange])
+
+    // Handle Enter key in input
+    const handleInputKeyDown = useCallback(
+        (e: React.KeyboardEvent) => {
+            if (e.key === "Enter") {
+                e.preventDefault()
+                handleAddField()
+            }
+        },
+        [handleAddField],
+    )
+
+    // Generate tooltip for disabled detect button
+    const detectButtonTooltip = useMemo(() => {
+        if (!testcase) {
+            return "Select a testcase first to detect fields"
+        }
+        if (!canDetectFields) {
+            return `No JSON object found in the "${effectiveKey}" column`
+        }
+        return `Detect ${detectableFields.length} field(s) from testcase (replaces current fields)`
+    }, [testcase, canDetectFields, effectiveKey, detectableFields.length])
+
+    return (
+        <div className="flex flex-col gap-3">
+            {/* Field Tags Display */}
+            <div className="flex flex-wrap gap-2 p-3 rounded-md border border-solid border-[var(--ant-color-border)] bg-[var(--ant-color-bg-container)] min-h-[48px]">
+                {/* Non-removable aggregate_score tag */}
+                <Tooltip title="Aggregate score across all fields (auto-generated)">
+                    <Tag color="success" className="font-mono text-[13px] !m-0 font-medium">
+                        aggregate_score
+                    </Tag>
+                </Tooltip>
+
+                {/* User-defined field tags */}
+                {value.map((field) => (
+                    <Tag
+                        key={field}
+                        closable
+                        onClose={() => handleRemoveField(field)}
+                        className="flex items-center font-mono text-[13px] !m-0"
+                    >
+                        {field}
+                    </Tag>
+                ))}
+
+                {/* Empty state message */}
+                {value.length === 0 && (
+                    <Text className="text-[var(--ant-color-text-secondary)] text-[13px]">
+                        Add fields to compare or detect them from a testcase
+                    </Text>
+                )}
+            </div>
+
+            {/* Add Field Input */}
+            <div className="flex gap-2">
+                <Input
+                    className="flex-1 font-mono"
+                    placeholder="Add field (e.g., name or user.address.city)"
+                    value={inputValue}
+                    onChange={(e) => setInputValue(e.target.value)}
+                    onKeyDown={handleInputKeyDown}
+                    suffix={
+                        <Tooltip title="Use dot notation for nested fields (e.g., user.name)">
+                            <Text type="secondary" className="text-[11px]">
+                                ?
+                            </Text>
+                        </Tooltip>
+                    }
+                />
+                <Button
+                    icon={<PlusOutlined />}
+                    onClick={handleAddField}
+                    disabled={!inputValue.trim()}
+                >
+                    Add
+                </Button>
+            </div>
+
+            {/* Actions Row */}
+            <div className="flex items-center justify-between">
+                <Text className="text-xs text-[var(--ant-color-text-secondary)]">
+                    Each field creates a column with value 0 (no match) or 1 (match)
+                </Text>
+
+                <Tooltip title={detectButtonTooltip}>
+                    <Button
+                        type="default"
+                        size="small"
+                        icon={<SearchOutlined />}
+                        onClick={handleDetectFields}
+                        disabled={!canDetectFields}
+                    >
+                        Detect from testcase
+                    </Button>
+                </Tooltip>
+            </div>
+        </div>
+    )
+}
+
+export default FieldsTagsEditor
diff --git a/web/oss/src/lib/Types.ts b/web/oss/src/lib/Types.ts
index 656a5fe08..cfcb4ee49 100644
--- a/web/oss/src/lib/Types.ts
+++ b/web/oss/src/lib/Types.ts
@@ -991,6 +991,7 @@ type ValueTypeOptions =
     | "messages"
     | "multiple_choice"
     | "llm_response_schema"
+    | "fields_checkbox_list"
 
 export interface EvaluationSettingsTemplate {
     type: ValueTypeOptions
diff --git a/web/oss/src/lib/helpers/extractJsonPaths.ts b/web/oss/src/lib/helpers/extractJsonPaths.ts
new file mode 100644
index 000000000..62176319e
--- /dev/null
+++ b/web/oss/src/lib/helpers/extractJsonPaths.ts
@@ -0,0 +1,86 @@
+/**
+ * Utility functions for extracting JSON paths from objects.
+ * Used by the JSON Multi-Field Match evaluator to auto-detect fields from testcase data.
+ */
+
+/**
+ * Recursively extracts all leaf paths from a JSON object using dot notation.
+ *
+ * Example:
+ * Input: {user: {name: "John", address: {city: "NYC"}}}
+ * Output: ["user.name", "user.address.city"]
+ *
+ * @param obj - The object to extract paths from
+ * @param prefix - Current path prefix (used for recursion)
+ * @returns Array of dot-notation paths to all leaf values
+ */
+export const extractJsonPaths = (obj: unknown, prefix = ""): string[] => {
+    if (obj === null || obj === undefined) return []
+    if (typeof obj !== "object") return prefix ? [prefix] : []
+
+    // For arrays, we don't expand individual indices - just mark the path
+    // This keeps the UI manageable and matches common use cases
+    if (Array.isArray(obj)) {
+        return prefix ? [prefix] : []
+    }
+
+    const paths: string[] = []
+
+    for (const key of Object.keys(obj as Record<string, unknown>)) {
+        const newPrefix = prefix ? `${prefix}.${key}` : key
+        const value = (obj as Record<string, unknown>)[key]
+
+        if (value !== null && typeof value === "object" && !Array.isArray(value)) {
+            // Recurse into nested objects
+            paths.push(...extractJsonPaths(value, newPrefix))
+        } else {
+            // Leaf node (primitive, array, or null)
+            paths.push(newPrefix)
+        }
+    }
+
+    return paths
+}
+
+/**
+ * Parses a JSON string and extracts all paths.
+ * Returns empty array if parsing fails.
+ *
+ * @param jsonString - JSON string to parse and extract paths from
+ * @returns Array of dot-notation paths
+ */
+export const extractJsonPathsFromString = (jsonString: string): string[] => {
+    try {
+        const parsed = JSON.parse(jsonString)
+        return extractJsonPaths(parsed)
+    } catch {
+        return []
+    }
+}
+
+/**
+ * Safely parses a value that might be JSON string or already an object.
+ *
+ * @param value - Value to parse (string or object)
+ * @returns Parsed object or null if invalid
+ */
+export const safeParseJson = (value: unknown): Record<string, unknown> | null => {
+    if (value === null || value === undefined) return null
+
+    if (typeof value === "object" && !Array.isArray(value)) {
+        return value as Record<string, unknown>
+    }
+
+    if (typeof value === "string") {
+        try {
+            const parsed = JSON.parse(value)
+            if (typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)) {
+                return parsed
+            }
+        } catch {
+            return null
+        }
+    }
+
+    return null
+}
diff --git a/web/oss/src/services/evaluators/index.ts b/web/oss/src/services/evaluators/index.ts
index f8576b9bc..2a9bb15de 100644
--- a/web/oss/src/services/evaluators/index.ts
+++ b/web/oss/src/services/evaluators/index.ts
@@ -67,6 +67,7 @@ const evaluatorIconsMap = {
     auto_similarity_match: similarityImg,
     auto_regex_test: regexImg,
     field_match_test: exactMatchImg,
+    json_multi_field_match: bracketCurlyImg,
     auto_webhook_test: webhookImg,
     auto_ai_critique: aiImg,
     auto_custom_code_run: codeImg,