Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
Validator for conversation-style query and response inputs.
"""

from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional
from typing_extensions import override
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
from ._validation_constants import MessageRole, ContentType
Expand All @@ -18,13 +18,34 @@ class ConversationValidator(ValidatorInterface):
"""

requires_query: bool = True
check_for_unsupported_tools: bool = False
error_target: ErrorTarget

def __init__(self, error_target: ErrorTarget, requires_query: bool = True):
UNSUPPORTED_TOOLS: List[str] = [
"web_search_call", "code_interpreter_call",
"azure_ai_search_call", "bing_grounding_call",
"bing_custom_search_preview_call", "azure_fabric",
"sharepoint_grounding", "browser_automation",
"openapi_call"
]

def __init__(self, error_target: ErrorTarget, requires_query: bool = True, check_for_unsupported_tools: bool = False):
"""Initialize with error target and query requirement."""
self.requires_query = requires_query
self.check_for_unsupported_tools = check_for_unsupported_tools
self.error_target = error_target

def _validate_field_exists(self, item: Dict[str, Any], field_name: str, context: str) -> Optional[EvaluationException]:
"""Validate that a field exists in a dictionary."""
if field_name not in item:
return EvaluationException(
message=f"Each {context} must contain a '{field_name}' field.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)
return None

def _validate_string_field(
self, item: Dict[str, Any], field_name: str, context: str
) -> Optional[EvaluationException]:
Expand All @@ -49,7 +70,7 @@ def _validate_string_field(
def _validate_list_field(
self, item: Dict[str, Any], field_name: str, context: str
) -> Optional[EvaluationException]:
"""Validate that a field exists and is a dictionary."""
"""Validate that a field exists and is a list."""
if field_name not in item:
return EvaluationException(
message=f"Each {context} must contain a '{field_name}' field.",
Expand Down Expand Up @@ -109,14 +130,19 @@ def _validate_text_content_item(self, content_item: Dict[str, Any], role: str) -

def _validate_tool_call_content_item(self, content_item: Dict[str, Any]) -> Optional[EvaluationException]:
"""Validate a tool_call content item."""
if "type" not in content_item or content_item["type"] != ContentType.TOOL_CALL:
valid_tool_call_content_types = [ContentType.TOOL_CALL, ContentType.FUNCTION_CALL, ContentType.OPENAPI_CALL, ContentType.MCP_APPROVAL_REQUEST]
valid_tool_call_content_types_as_strings = [t.value for t in valid_tool_call_content_types]
if "type" not in content_item or content_item["type"] not in valid_tool_call_content_types:
return EvaluationException(
message=f"The content item must be of type '{ContentType.TOOL_CALL.value}' in tool_call content item.",
message=f"The content item must be of type {valid_tool_call_content_types_as_strings} in tool_call content item.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)

if content_item["type"] == ContentType.MCP_APPROVAL_REQUEST:
return None

error = self._validate_string_field(content_item, "name", "tool_call content items")
if error:
return error
Expand Down Expand Up @@ -156,13 +182,14 @@ def _validate_assistant_message(self, message: Dict[str, Any]) -> Optional[Evalu
"""Validate assistant message content."""
content = message["content"]

valid_assistant_content_types = [ContentType.TEXT, ContentType.OUTPUT_TEXT, ContentType.TOOL_CALL, ContentType.FUNCTION_CALL, ContentType.MCP_APPROVAL_REQUEST, ContentType.OPENAPI_CALL]
valid_assistant_content_types_as_strings = [t.value for t in valid_assistant_content_types]
if isinstance(content, list):
for content_item in content:
content_type = content_item["type"]
valid_assistant_content_types = [ContentType.TEXT, ContentType.OUTPUT_TEXT, ContentType.TOOL_CALL]
if content_type not in valid_assistant_content_types:
return EvaluationException(
message=f"Invalid content type '{content_type}' for message with role '{MessageRole.ASSISTANT.value}'. Must be one of {[t.value for t in valid_assistant_content_types]}.",
message=f"Invalid content type '{content_type}' for message with role '{MessageRole.ASSISTANT.value}'. Must be one of {valid_assistant_content_types_as_strings}.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
Expand All @@ -172,10 +199,22 @@ def _validate_assistant_message(self, message: Dict[str, Any]) -> Optional[Evalu
error = self._validate_text_content_item(content_item, MessageRole.ASSISTANT)
if error:
return error
else: # must be tool_call
elif content_type in [ContentType.TOOL_CALL, ContentType.FUNCTION_CALL, ContentType.OPENAPI_CALL]:
error = self._validate_tool_call_content_item(content_item)
if error:
return error

# Raise error in case of unsupported tools for evaluators that enabled check_for_unsupported_tools
if self.check_for_unsupported_tools:
if content_type == ContentType.TOOL_CALL or content_type == ContentType.OPENAPI_CALL:
name = "openapi_call" if content_type == ContentType.OPENAPI_CALL else content_item["name"].lower()
if name in self.UNSUPPORTED_TOOLS:
return EvaluationException(
message=f"{name} tool call is currently not supported for {self.error_target} evaluator.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.NOT_APPLICABLE,
target=self.error_target,
)
return None

def _validate_tool_message(self, message: Dict[str, Any]) -> Optional[EvaluationException]:
Expand All @@ -196,21 +235,30 @@ def _validate_tool_message(self, message: Dict[str, Any]) -> Optional[Evaluation
if error:
return error

valid_tool_content_types = [ContentType.TOOL_RESULT, ContentType.FUNCTION_CALL_OUTPUT, ContentType.MCP_APPROVAL_RESPONSE, ContentType.OPENAPI_CALL_OUTPUT]
valid_tool_content_types_as_strings = [t.value for t in valid_tool_content_types]
for content_item in content:
content_type = content_item["type"]
if content_type != ContentType.TOOL_RESULT:
if content_type not in valid_tool_content_types:
return EvaluationException(
message=f"Invalid content type '{content_type}' for message with role '{MessageRole.TOOL.value}'. Must be '{ContentType.TOOL_RESULT.value}'.",
message=f"Invalid content type '{content_type}' for message with role '{MessageRole.TOOL.value}'. Must be one of {valid_tool_content_types_as_strings}.",
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.INVALID_VALUE,
target=self.error_target,
)

error = self._validate_dict_field(
content_item, "tool_result", f"content items for role '{MessageRole.TOOL.value}'"
)
if error:
return error
if content_type in [ContentType.TOOL_RESULT, ContentType.OPENAPI_CALL_OUTPUT]:
error = self._validate_field_exists(
content_item, "tool_result", f"content items for role '{MessageRole.TOOL.value}'"
)
if error:
return error
elif content_type == ContentType.FUNCTION_CALL_OUTPUT:
error = self._validate_field_exists(
content_item, "function_call_output", f"content items for role '{MessageRole.TOOL.value}'"
)
if error:
return error

return None

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ class ToolCallsValidator(ToolDefinitionsValidator):

optional_tool_definitions = False

def __init__(self, error_target: ErrorTarget, requires_query: bool = True, optional_tool_definitions: bool = False):
super().__init__(error_target, requires_query, optional_tool_definitions)
def __init__(self, error_target: ErrorTarget, requires_query: bool = True, optional_tool_definitions: bool = False, check_for_unsupported_tools: bool = False):
super().__init__(error_target, requires_query, optional_tool_definitions, check_for_unsupported_tools)

def _validate_tool_calls(self, tool_calls) -> Optional[EvaluationException]:
"""Validate tool calls input."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ class ToolDefinitionsValidator(ConversationValidator):

optional_tool_definitions: bool = True

def __init__(self, error_target: ErrorTarget, requires_query: bool = True, optional_tool_definitions: bool = True):
super().__init__(error_target, requires_query)
def __init__(self, error_target: ErrorTarget, requires_query: bool = True, optional_tool_definitions: bool = True, check_for_unsupported_tools: bool = False):
super().__init__(error_target, requires_query, check_for_unsupported_tools)
self.optional_tool_definitions = optional_tool_definitions

def _validate_tool_definition(self, tool_definition) -> Optional[EvaluationException]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,9 @@ class ContentType(str, Enum):
OUTPUT_TEXT = "output_text"
TOOL_CALL = "tool_call"
TOOL_RESULT = "tool_result"
FUNCTION_CALL = "function_call"
FUNCTION_CALL_OUTPUT = "function_call_output"
MCP_APPROVAL_REQUEST = "mcp_approval_request"
MCP_APPROVAL_RESPONSE = "mcp_approval_response"
OPENAPI_CALL = "openapi_call"
OPENAPI_CALL_OUTPUT = "openapi_call_output"
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def __init__(self, model_config, *, threshold=3, credential=None, **kwargs):
self._validator = ConversationValidator(
error_target=ErrorTarget.GROUNDEDNESS_EVALUATOR,
requires_query=False,
check_for_unsupported_tools=True,
)

self._validator_with_query = ConversationValidator(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class _TaskNavigationEfficiencyEvaluator(EvaluatorBase):
{"role": "assistant", "content": [{"type": "tool_call", "tool_call_id": "call_3", "name": "call_tool_B", "arguments": {}}]},
{"role": "assistant", "content": [{"type": "tool_call", "tool_call_id": "call_4", "name": "response_synthesis", "arguments": {}}]}
],
ground_truth=["identify_tools_to_call", ""call_tool_A", "call_tool_B", "response_synthesis"]
ground_truth=["identify_tools_to_call", "call_tool_A", "call_tool_B", "response_synthesis"]
)

# Example 2: Using tool names with parameters (exact parameter matching required)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ def __init__(self, model_config, *, threshold=_DEFAULT_TOOL_CALL_ACCURACY_SCORE,
self.threshold = threshold

# Initialize input validator
self._validator = ToolCallsValidator(error_target=ErrorTarget.TOOL_CALL_ACCURACY_EVALUATOR)
self._validator = ToolCallsValidator(
error_target=ErrorTarget.TOOL_CALL_ACCURACY_EVALUATOR,
check_for_unsupported_tools=True,
)

super().__init__(
model_config=model_config,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def __init__(self, model_config, *, credential=None, **kwargs):
self._validator = ToolDefinitionsValidator(
error_target=ErrorTarget.TOOL_CALL_SUCCESS_EVALUATOR,
requires_query=False,
check_for_unsupported_tools=True,
)

super().__init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ def __init__(

# Initialize input validator
self._validator = ToolDefinitionsValidator(
error_target=ErrorTarget.TOOL_INPUT_ACCURACY_EVALUATOR, optional_tool_definitions=False
error_target=ErrorTarget.TOOL_INPUT_ACCURACY_EVALUATOR, optional_tool_definitions=False,
check_for_unsupported_tools=True,
)

super().__init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,10 @@ def __init__(
prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)

# Initialize input validator
self._validator = ToolDefinitionsValidator(error_target=ErrorTarget.TOOL_OUTPUT_UTILIZATION_EVALUATOR)
self._validator = ToolDefinitionsValidator(
error_target=ErrorTarget.TOOL_OUTPUT_UTILIZATION_EVALUATOR,
check_for_unsupported_tools=True,
)

super().__init__(
model_config=model_config,
Expand Down
Loading