Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 83 additions & 20 deletions bluebox/agents/abstract_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,47 @@
get_workaround_for_error,
)
from bluebox.utils.data_utils import format_bytes
from bluebox.utils.llm_serialization import serialize_tool_result, strip_llm_excluded
from bluebox.utils.llm_utils import token_optimized as token_optimized_decorator
from bluebox.utils.logger import get_logger

logger = get_logger(name=__name__)


# Keep persisted tool previews small so iterative runs don't bloat context
PERSISTED_TOOL_PREVIEW_MAX_CHARS = 800


class ToolResultPersistMode(StrEnum):
"""
Policy controlling when a tool result is persisted to the workspace.

Persistence saves the full result as a raw artifact and returns a
compact preview to the LLM, keeping context usage in check for
large payloads.

Attributes:
NEVER: Never persist; the full result is returned inline.
ALWAYS: Always persist, regardless of size.
OVERFLOW: Persist only when the serialized result exceeds the
tool's ``max_characters`` threshold.
"""
NEVER = "never"
ALWAYS = "always"
OVERFLOW = "overflow"


# Keep persisted tool previews small so iterative runs don't blow context.
PERSISTED_TOOL_PREVIEW_MAX_CHARS = 800


class AgentExecutionMode(StrEnum):
"""Execution mode for agent loops."""
"""
Execution mode for agent loops.

Attributes:
CONVERSATIONAL: Interactive mode where the agent responds to user
messages one at a time via :meth:`process_new_message`.
AUTONOMOUS: Self-directed mode where the agent runs a tool-driven
loop until it calls a finalize tool or hits the iteration cap.
See :meth:`AbstractAgent.run_autonomous`.
"""
CONVERSATIONAL = "conversational"
AUTONOMOUS = "autonomous"

Expand Down Expand Up @@ -108,36 +131,75 @@ class variable. Orchestrator agents use these cards to discover subagent

@dataclass(frozen=True)
class _ToolMeta:
"""Metadata attached to a handler method by @agent_tool."""
name: str # tool name registered with the LLM client
description: str # tool description shown to the LLM
parameters: dict[str, Any] # JSON Schema for tool parameters
availability: bool | Callable[..., bool] # whether the tool should be registered right now
"""
Metadata attached to a handler method by :func:`agent_tool`.

Instances are stored on the decorated method as ``method._tool_meta``
and collected at class-definition time by
:meth:`AbstractAgent._collect_tools`.

Attributes:
name: Tool name registered with the LLM client (derived from the
method name by stripping leading underscores).
description: Human-readable description shown to the LLM.
parameters: JSON Schema ``object`` describing accepted parameters.
availability: Static boolean or a callable ``(self) -> bool``
evaluated before each LLM call to gate tool registration.
persist: Result-persistence policy. See :class:`ToolResultPersistMode`.
max_characters: Character threshold used by
:attr:`ToolResultPersistMode.OVERFLOW` to decide when to
persist a result to the workspace.
token_optimized: If ``True``, the tool result is encoded with
the ``token_optimized`` decorator for reduced token usage.
"""
name: str
description: str
parameters: dict[str, Any]
availability: bool | Callable[..., bool]
persist: ToolResultPersistMode = ToolResultPersistMode.NEVER
max_characters: int = 10_000
token_optimized: bool = False


def _serialize_tool_result(tool_result: Any) -> tuple[str, str]:
try:
return json.dumps(tool_result, ensure_ascii=False, default=str, indent=2), "json"
except (TypeError, ValueError):
return str(tool_result), "text"
def _normalize_file_scope(scope: str) -> str:
"""
Normalize and validate a file-tool scope string. Strips whitespace, lowercases,
and ensures the value is one of the accepted scope literals.

Args:
scope: Raw scope value from a tool call (e.g. ``"Workspace"``).

def _normalize_file_scope(scope: str) -> str:
"""Normalize and validate file tool scope."""
Returns:
The normalized scope (``"workspace"`` or ``"docs"``).

Raises:
ValueError: If *scope* is not a recognized value.
"""
normalized_scope = scope.strip().lower()
if normalized_scope not in {"workspace", "docs"}:
raise ValueError("scope must be 'workspace' or 'docs'")
return normalized_scope


def _parse_search_terms(query: str) -> list[str]:
"""Split query text into distinct terms for terms-mode search."""
"""
Split a query string into unique, order-preserving search terms.

Tokens are split on commas and whitespace. Empty tokens and
duplicates are discarded while preserving first-occurrence order.

Args:
query: Free-text search query (e.g. ``"foo, bar baz"``).

Returns:
Deduplicated list of non-empty terms in original order.
"""
seen: set[str] = set()
terms: list[str] = []
for token in re.split(r"[,\s]+", query):
for token in re.split(
pattern=r"[,\s]+",
string=query
):
term = token.strip()
if term and term not in seen:
seen.add(term)
Expand Down Expand Up @@ -551,7 +613,7 @@ def _maybe_persist_tool_result(
if persist_mode == ToolResultPersistMode.NEVER:
return tool_result

serialized, content_type = _serialize_tool_result(tool_result)
serialized, content_type = serialize_tool_result(tool_result)
char_count = len(serialized)

if persist_mode == ToolResultPersistMode.OVERFLOW and char_count <= tool_meta.max_characters:
Expand Down Expand Up @@ -1080,6 +1142,7 @@ def _execute_tool(self, tool_name: str, tool_arguments: dict[str, Any]) -> dict[
logger.debug("Executing tool %s with arguments: %s", tool_name, tool_arguments)
# handler is unbound (from cls, not self) so pass self explicitly
raw_result = handler(self, **validated_arguments)
raw_result = strip_llm_excluded(raw_result) # strip LLMExclude-annotated fields from any Pydantic models
result_for_llm = self._maybe_persist_tool_result(
tool_name=tool_name,
tool_meta=tool_meta,
Expand Down
1 change: 1 addition & 0 deletions bluebox/agents/specialists/interaction_specialist.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class InteractionSpecialist(AbstractAgent):
"structural context (forms, inputs, buttons, links)."
),
)

SYSTEM_PROMPT: str = dedent("""\
You are a UI interaction analyst specializing in understanding what users
did on web pages from recorded browser interaction events.
Expand Down
1 change: 1 addition & 0 deletions bluebox/agents/specialists/network_specialist.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class NetworkSpecialist(AbstractAgent):
"inspecting request/response data, and semantic search across captured traffic."
),
)

SYSTEM_PROMPT: str = dedent(f"""
You are a network traffic analyst specializing in captured browser network data.

Expand Down
167 changes: 167 additions & 0 deletions bluebox/utils/llm_serialization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
"""
bluebox/utils/llm_serialization.py

Utilities for controlling what data gets sent to LLMs from tool results.

The LLMExclude marker lets you annotate Pydantic model fields that should be
stripped before a tool result is serialized for the LLM — e.g. large blobs,
internal IDs, or raw data the model doesn't need.

Usage on models::

from typing import Annotated
from pydantic import BaseModel
from bluebox.utils.llm_serialization import LLMExclude

class NetworkTransaction(BaseModel):
url: str
method: str
response_body: Annotated[str, LLMExclude()] # stripped before LLM sees it

Tool handlers can return these models (or dicts containing them) directly —
the agent infrastructure calls strip_llm_excluded() automatically.
"""

from __future__ import annotations

import functools
import json
from enum import StrEnum
from typing import Any, NamedTuple

from pydantic import BaseModel


class SerializedContentType(StrEnum):
"""
Content type of a serialized tool result.

Attributes:
JSON: Successfully serialized as JSON.
TEXT: Fell back to ``str()`` representation.
"""
JSON = "json"
TEXT = "text"


class SerializedToolResult(NamedTuple):
"""
Result of serializing a tool return value for the LLM.

Attributes:
serialized: The serialized string (JSON or plain text).
content_type: How the value was serialized.
"""
serialized: str
content_type: SerializedContentType


class LLMExclude:
"""
Marker: exclude this field from LLM tool results.

Attach via ``Annotated``::

name: str # included
raw_blob: Annotated[bytes, LLMExclude()] # excluded
"""
pass


def serialize_tool_result(tool_result: Any) -> SerializedToolResult:
"""
Serialize a tool result to a JSON or plain-text string for the LLM.

Attempts JSON serialization first (using ``default=str`` for non-serializable
types). Falls back to ``str()`` if JSON encoding fails.

Args:
tool_result: The value returned by a tool handler (typically a dict).

Returns:
A :class:`SerializedToolResult` (also unpacks as a two-tuple).
"""
try:
return SerializedToolResult(
serialized=json.dumps(
tool_result,
ensure_ascii=False,
default=str,
indent=2
),
content_type=SerializedContentType.JSON,
)
except (TypeError, ValueError):
return SerializedToolResult(
serialized=str(tool_result),
content_type=SerializedContentType.TEXT
)


@functools.lru_cache(maxsize=256)
def _excluded_fields(model_cls: type[BaseModel]) -> frozenset[str]:
"""
Return the set of field names annotated with LLMExclude for a model class.

Scans ``model_cls.model_fields`` and checks each field's ``metadata`` list
for an ``LLMExclude`` instance (attached via ``Annotated[Type, LLMExclude()]``).

Results are cached per class via ``lru_cache``. Safe because Pydantic field
definitions are fixed at class creation time.

Args:
model_cls: A Pydantic BaseModel subclass to inspect.

Returns:
Frozen set of field names that should be excluded from LLM serialization.
Empty frozenset if the model has no LLMExclude annotations.
"""
return frozenset(
name
for name, info in model_cls.model_fields.items()
if any(isinstance(m, LLMExclude) for m in info.metadata)
)


def strip_llm_excluded(obj: Any) -> Any:
"""
Recursively strip LLMExclude-annotated fields from Pydantic models.

Walks the object tree and converts any ``BaseModel`` instance into a dict
with LLMExclude-annotated fields removed. Non-BaseModel values pass through
unchanged (just an ``isinstance`` check).

Supported containers (recursed into):
- ``BaseModel``: fields filtered, remaining values recursed
- ``dict``: values recursed, keys preserved
- ``list`` / ``tuple``: elements recursed, container type preserved

Args:
obj: Any object — typically a tool handler's return value. Can be a
BaseModel, dict, list, tuple, or primitive.

Returns:
A plain-dict / list / tuple / primitive copy with all LLMExclude fields
removed from any BaseModel instances found at any nesting depth.
"""
if isinstance(obj, BaseModel):
cls = type(obj)
excluded = _excluded_fields(cls)
result = {
name: strip_llm_excluded(value)
for name in cls.model_fields
if name not in excluded
for value in (getattr(obj, name),) # bind to local for clarity
}
# include @computed_field properties (not in model_fields)
for name in cls.model_computed_fields:
if name not in excluded:
result[name] = strip_llm_excluded(getattr(obj, name))
return result
if isinstance(obj, dict):
return {k: strip_llm_excluded(v) for k, v in obj.items()}
if isinstance(obj, list):
return [strip_llm_excluded(item) for item in obj]
if isinstance(obj, tuple):
return tuple(strip_llm_excluded(item) for item in obj)
return obj
Loading