Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions truffile/infer/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,12 @@ def get_tag_for_tool(tool: Tool) -> dict:
"stop_after_first": not allow_parallel,
},
}
req.cfg.response_format.format = ResponseFormat.STRUCTURAL_TAG
try:
fmt = ResponseFormat.STRUCTURAL_TAG
except AttributeError:
# older proto or server; fall back to prompt-only tool guidance.
return
req.cfg.response_format.format = fmt
req.cfg.response_format.schema = json.dumps(structural_tag, indent=0)


Expand All @@ -66,7 +71,6 @@ def get_tag_for_tool(tool: Tool) -> dict:
"content": {"type": "json_schema", "json_schema": tool.input_schema},
"end": end,
}

structural_tag = {
"type": "structural_tag",
"format": {
Expand All @@ -87,5 +91,10 @@ def get_tag_for_tool(tool: Tool) -> dict:
],
},
}
req.cfg.response_format.format = ResponseFormat.STRUCTURAL_TAG
try:
fmt = ResponseFormat.STRUCTURAL_TAG
except AttributeError:
# older proto or server; fall back to prompt-only tool guidance.
return
req.cfg.response_format.format = fmt
req.cfg.response_format.schema = json.dumps(structural_tag, indent=0)
24 changes: 21 additions & 3 deletions truffile/infer/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import threading
import time
import uuid
import os
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from typing import Any, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -219,6 +220,17 @@ def _usage_to_openai(usage: Any) -> Dict[str, int]:
}


def _set_structural_tag(req: IRequest, structural_tag: Dict[str, Any]) -> bool:
try:
fmt = ResponseFormat.STRUCTURAL_TAG
except AttributeError:
# older proto or server; fall back to prompt-only constraints.
return False
req.cfg.response_format.format = fmt
req.cfg.response_format.schema = json.dumps(structural_tag, indent=0)
return True


class _StreamFilter:
def __init__(self, hide_cot: bool = False) -> None:
self._buffer = ""
Expand Down Expand Up @@ -370,8 +382,7 @@ def build_request(self, payload: Dict[str, Any]) -> Tuple[IRequest, Model, bool,
],
},
}
req.cfg.response_format.format = ResponseFormat.STRUCTURAL_TAG
req.cfg.response_format.schema = json.dumps(structural_tag, indent=0)
_set_structural_tag(req, structural_tag)
else:
req.cfg.response_format.format = ResponseFormat.JSON
req.cfg.response_format.schema = json.dumps(schema)
Expand Down Expand Up @@ -464,6 +475,7 @@ def do_POST(self) -> None:
try:
payload = self._read_body()
except Exception as e:
print(f"\tError reading request body: {e}")
self._send_json(400, {"error": {"message": str(e), "type": "invalid_request_error"}})
return

Expand All @@ -472,6 +484,7 @@ def do_POST(self) -> None:
try:
req, model, is_reasoner, _tools, stream = proxy.build_request(payload)
except Exception as e:
print(f"\tError building request: {e}")
self._send_json(400, {"error": {"message": str(e), "type": "invalid_request_error"}})
return

Expand Down Expand Up @@ -502,9 +515,13 @@ def do_POST(self) -> None:
raw_content = ""
last_finish = None
filter_state = _StreamFilter(hide_cot=is_reasoner)

log_output = os.getenv("TRUFFLE_PROXY_LOG_STREAM_OUTPUT", "0") == "1"
if log_output:
print("Streaming output:")
for ir in proxy.run_stream(req):
raw_content += ir.content
if log_output:
print(ir.content, end="", flush=True)
if ir.HasField("finish_reason") and ir.finish_reason != FinishReason.FINISH_UNSPECIFIED:
last_finish = ir.finish_reason
visible = filter_state.feed(ir.content)
Expand Down Expand Up @@ -554,6 +571,7 @@ def do_POST(self) -> None:
{
"id": f"call_{i+1}",
"type": "function",
"index": i,
"function": {"name": name, "arguments": args},
}
)
Expand Down