From 4f102e7bc2d4ae946542f8dd4738067b76f9c597 Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Mon, 23 Feb 2026 13:00:16 +0200
Subject: [PATCH 1/3] fix: resolve false-positive rate limit and
 one-message-behind in chat sessions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Claude Code CLI v2.1.45+ emits a `rate_limit_event` message type that
the Python SDK v0.1.19 cannot parse, raising MessageParseError. Two bugs
resulted:

1. **False-positive rate limit**: check_rate_limit_error() matched
   "rate_limit" in the exception string "Unknown message type:
   rate_limit_event" via both an explicit type check and a regex fallback,
   triggering 15-19s backoff + query re-send on every session.

2. **One-message-behind**: The MessageParseError killed the
   receive_response() async generator, but the CLI subprocess was still
   alive with buffered response data. Catching and returning meant the
   response was never consumed. The next send_message() would read the
   previous response first, creating a one-behind offset.

Changes:

- chat_constants.py: check_rate_limit_error() now returns (False, None)
  for any MessageParseError, blocking both false-positive paths. Added
  safe_receive_response() helper that retries receive_response() on
  MessageParseError — the SDK's decoupled producer/consumer architecture
  (anyio memory channel) allows the new generator to continue reading
  remaining messages without data loss. Removed calculate_rate_limit_backoff
  re-export and MAX_CHAT_RATE_LIMIT_RETRIES constant.

- spec_chat_session.py, assistant_chat_session.py, expand_chat_session.py:
  Replaced retry-with-backoff loops with safe_receive_response() wrapper.
  Removed asyncio.sleep backoff, query re-send, and rate_limited yield.
  Cleaned up unused imports (asyncio, calculate_rate_limit_backoff,
  MAX_CHAT_RATE_LIMIT_RETRIES).

- agent.py: Added inner retry loop around receive_response() with same
  MessageParseError skip-and-restart pattern. Removed early-return that
  truncated responses.

- types.ts: Removed SpecChatRateLimitedMessage,
  AssistantChatRateLimitedMessage, and their union entries.

- useSpecChat.ts, useAssistantChat.ts, useExpandChat.ts: Removed dead
  'rate_limited' case handlers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 agent.py                                  |  97 ++++----
 server/services/assistant_chat_session.py | 105 ++++-----
 server/services/chat_constants.py         |  60 +++--
 server/services/expand_chat_session.py    |  90 +++-----
 server/services/spec_chat_session.py      | 256 ++++++++++------------
 ui/src/hooks/useAssistantChat.ts          |  14 --
 ui/src/hooks/useExpandChat.ts             |  14 --
 ui/src/hooks/useSpecChat.ts               |  14 --
 ui/src/lib/types.ts                       |  17 --
 9 files changed, 281 insertions(+), 386 deletions(-)

diff --git a/agent.py b/agent.py
index b265a0a8..c2244390 100644
--- a/agent.py
+++ b/agent.py
@@ -74,46 +74,65 @@ async def run_agent_session(
         await client.query(message)
 
         # Collect response text and show tool use
+        # Retry receive_response() on MessageParseError — the SDK raises this for
+        # unknown CLI message types (e.g. "rate_limit_event") which kills the async
+        # generator.  The subprocess is still alive so we restart to read remaining
+        # messages from the buffered channel.
         response_text = ""
-        async for msg in client.receive_response():
-            msg_type = type(msg).__name__
-
-            # Handle AssistantMessage (text and tool use)
-            if msg_type == "AssistantMessage" and hasattr(msg, "content"):
-                for block in msg.content:
-                    block_type = type(block).__name__
-
-                    if block_type == "TextBlock" and hasattr(block, "text"):
-                        response_text += block.text
-                        print(block.text, end="", flush=True)
-                    elif block_type == "ToolUseBlock" and hasattr(block, "name"):
-                        print(f"\n[Tool: {block.name}]", flush=True)
-                        if hasattr(block, "input"):
-                            input_str = str(block.input)
-                            if len(input_str) > 200:
-                                print(f"   Input: {input_str[:200]}...", flush=True)
-                            else:
-                                print(f"   Input: {input_str}", flush=True)
-
-            # Handle UserMessage (tool results)
-            elif msg_type == "UserMessage" and hasattr(msg, "content"):
-                for block in msg.content:
-                    block_type = type(block).__name__
-
-                    if block_type == "ToolResultBlock":
-                        result_content = getattr(block, "content", "")
-                        is_error = getattr(block, "is_error", False)
-
-                        # Check if command was blocked by security hook
-                        if "blocked" in str(result_content).lower():
-                            print(f"   [BLOCKED] {result_content}", flush=True)
-                        elif is_error:
-                            # Show errors (truncated)
-                            error_str = str(result_content)[:500]
-                            print(f"   [Error] {error_str}", flush=True)
-                        else:
-                            # Tool succeeded - just show brief confirmation
-                            print("   [Done]", flush=True)
+        max_parse_retries = 50
+        parse_retries = 0
+        while True:
+            try:
+                async for msg in client.receive_response():
+                    msg_type = type(msg).__name__
+
+                    # Handle AssistantMessage (text and tool use)
+                    if msg_type == "AssistantMessage" and hasattr(msg, "content"):
+                        for block in msg.content:
+                            block_type = type(block).__name__
+
+                            if block_type == "TextBlock" and hasattr(block, "text"):
+                                response_text += block.text
+                                print(block.text, end="", flush=True)
+                            elif block_type == "ToolUseBlock" and hasattr(block, "name"):
+                                print(f"\n[Tool: {block.name}]", flush=True)
+                                if hasattr(block, "input"):
+                                    input_str = str(block.input)
+                                    if len(input_str) > 200:
+                                        print(f"   Input: {input_str[:200]}...", flush=True)
+                                    else:
+                                        print(f"   Input: {input_str}", flush=True)
+
+                    # Handle UserMessage (tool results)
+                    elif msg_type == "UserMessage" and hasattr(msg, "content"):
+                        for block in msg.content:
+                            block_type = type(block).__name__
+
+                            if block_type == "ToolResultBlock":
+                                result_content = getattr(block, "content", "")
+                                is_error = getattr(block, "is_error", False)
+
+                                # Check if command was blocked by security hook
+                                if "blocked" in str(result_content).lower():
+                                    print(f"   [BLOCKED] {result_content}", flush=True)
+                                elif is_error:
+                                    # Show errors (truncated)
+                                    error_str = str(result_content)[:500]
+                                    print(f"   [Error] {error_str}", flush=True)
+                                else:
+                                    # Tool succeeded - just show brief confirmation
+                                    print("   [Done]", flush=True)
+
+                break  # Normal completion
+            except Exception as inner_exc:
+                if type(inner_exc).__name__ == "MessageParseError":
+                    parse_retries += 1
+                    if parse_retries > max_parse_retries:
+                        print(f"Too many unrecognized CLI messages ({parse_retries}), stopping")
+                        break
+                    print(f"Ignoring unrecognized message from Claude CLI: {inner_exc}")
+                    continue
+                raise  # Re-raise to outer except
 
         print("\n" + "-" * 70 + "\n")
         return "continue", response_text
diff --git a/server/services/assistant_chat_session.py b/server/services/assistant_chat_session.py
index f666b1be..cff1f6ce 100755
--- a/server/services/assistant_chat_session.py
+++ b/server/services/assistant_chat_session.py
@@ -7,7 +7,6 @@
 but cannot modify any files.
 """
 
-import asyncio
 import json
 import logging
 import os
@@ -27,10 +26,9 @@
     get_messages,
 )
 from .chat_constants import (
-    MAX_CHAT_RATE_LIMIT_RETRIES,
     ROOT_DIR,
-    calculate_rate_limit_backoff,
     check_rate_limit_error,
+    safe_receive_response,
 )
 
 # Load environment variables from .env file if present
@@ -399,66 +397,47 @@ async def _query_claude(self, message: str) -> AsyncGenerator[dict, None]:
 
         full_response = ""
 
-        # Stream the response (with rate-limit retry)
-        for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
-            try:
-                async for msg in self.client.receive_response():
-                    msg_type = type(msg).__name__
-
-                    if msg_type == "AssistantMessage" and hasattr(msg, "content"):
-                        for block in msg.content:
-                            block_type = type(block).__name__
-
-                            if block_type == "TextBlock" and hasattr(block, "text"):
-                                text = block.text
-                                if text:
-                                    full_response += text
-                                    yield {"type": "text", "content": text}
-
-                            elif block_type == "ToolUseBlock" and hasattr(block, "name"):
-                                tool_name = block.name
-                                tool_input = getattr(block, "input", {})
-
-                                # Intercept ask_user tool calls -> yield as question message
-                                if tool_name == "mcp__features__ask_user":
-                                    questions = tool_input.get("questions", [])
-                                    if questions:
-                                        yield {
-                                            "type": "question",
-                                            "questions": questions,
-                                        }
-                                        continue
-
-                                yield {
-                                    "type": "tool_call",
-                                    "tool": tool_name,
-                                    "input": tool_input,
-                                }
-                # Completed successfully — break out of retry loop
-                break
-            except Exception as exc:
-                is_rate_limit, retry_secs = check_rate_limit_error(exc)
-                if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
-                    delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
-                    logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
-                    yield {
-                        "type": "rate_limited",
-                        "retry_in": delay,
-                        "attempt": _attempt + 1,
-                        "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
-                    }
-                    await asyncio.sleep(delay)
-                    await self.client.query(message)
-                    continue
-                if is_rate_limit:
-                    logger.error("Rate limit retries exhausted for assistant chat")
-                    yield {"type": "error", "content": "Rate limited. Please try again later."}
-                    return
-                # Non-rate-limit MessageParseError: log and break (don't crash)
-                if type(exc).__name__ == "MessageParseError":
-                    logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
-                    break
-                raise
+        # Stream the response
+        try:
+            async for msg in safe_receive_response(self.client, logger):
+                msg_type = type(msg).__name__
+
+                if msg_type == "AssistantMessage" and hasattr(msg, "content"):
+                    for block in msg.content:
+                        block_type = type(block).__name__
+
+                        if block_type == "TextBlock" and hasattr(block, "text"):
+                            text = block.text
+                            if text:
+                                full_response += text
+                                yield {"type": "text", "content": text}
+
+                        elif block_type == "ToolUseBlock" and hasattr(block, "name"):
+                            tool_name = block.name
+                            tool_input = getattr(block, "input", {})
+
+                            # Intercept ask_user tool calls -> yield as question message
+                            if tool_name == "mcp__features__ask_user":
+                                questions = tool_input.get("questions", [])
+                                if questions:
+                                    yield {
+                                        "type": "question",
+                                        "questions": questions,
+                                    }
+                                    continue
+
+                            yield {
+                                "type": "tool_call",
+                                "tool": tool_name,
+                                "input": tool_input,
+                            }
+        except Exception as exc:
+            is_rate_limit, _ = check_rate_limit_error(exc)
+            if is_rate_limit:
+                logger.warning(f"Rate limited: {exc}")
+                yield {"type": "error", "content": "Rate limited. Please try again later."}
+                return
+            raise
 
         # Store the complete response in the database
         if full_response and self.conversation_id:
diff --git a/server/services/chat_constants.py b/server/services/chat_constants.py
index 4dfbcd6b..16a41fdd 100644
--- a/server/services/chat_constants.py
+++ b/server/services/chat_constants.py
@@ -12,7 +12,7 @@
 import logging
 import sys
 from pathlib import Path
-from typing import AsyncGenerator
+from typing import Any, AsyncGenerator
 
 # -------------------------------------------------------------------
 # Root directory of the autoforge project (repository root).
@@ -33,15 +33,10 @@
 # imports continue to work unchanged.
 # -------------------------------------------------------------------
 from env_constants import API_ENV_VARS  # noqa: E402, F401
-from rate_limit_utils import calculate_rate_limit_backoff, is_rate_limit_error, parse_retry_after  # noqa: E402, F401
+from rate_limit_utils import is_rate_limit_error, parse_retry_after  # noqa: E402, F401
 
 logger = logging.getLogger(__name__)
 
-# -------------------------------------------------------------------
-# Rate-limit handling for chat sessions
-# -------------------------------------------------------------------
-MAX_CHAT_RATE_LIMIT_RETRIES = 3
-
 
 def check_rate_limit_error(exc: Exception) -> tuple[bool, int | None]:
     """Inspect an exception and determine if it represents a rate-limit.
@@ -49,24 +44,15 @@ def check_rate_limit_error(exc: Exception) -> tuple[bool, int | None]:
     Returns ``(is_rate_limit, retry_seconds)``.  ``retry_seconds`` is the
     parsed Retry-After value when available, otherwise ``None`` (caller
     should use exponential backoff).
-
-    Handles:
-    - ``MessageParseError`` whose raw *data* dict has
-      ``type == "rate_limit_event"`` (Claude CLI sends this).
-    - Any exception whose string representation matches known rate-limit
-      patterns (via ``rate_limit_utils.is_rate_limit_error``).
     """
-    exc_str = str(exc)
-
-    # Check for MessageParseError with a rate_limit_event payload
-    cls_name = type(exc).__name__
-    if cls_name == "MessageParseError":
-        raw_data = getattr(exc, "data", None)
-        if isinstance(raw_data, dict) and raw_data.get("type") == "rate_limit_event":
-            retry = parse_retry_after(str(raw_data)) if raw_data else None
-            return True, retry
+    # MessageParseError = unknown CLI message type (e.g. "rate_limit_event").
+    # These are informational events, NOT actual rate limit errors.
+    # The word "rate_limit" in the type name would false-positive the regex.
+    if type(exc).__name__ == "MessageParseError":
+        return False, None
 
-    # Fallback: match error text against known rate-limit patterns
+    # For all other exceptions: match error text against known rate-limit patterns
+    exc_str = str(exc)
     if is_rate_limit_error(exc_str):
         retry = parse_retry_after(exc_str)
         return True, retry
@@ -74,6 +60,34 @@ def check_rate_limit_error(exc: Exception) -> tuple[bool, int | None]:
     return False, None
 
 
+async def safe_receive_response(client: Any, log: logging.Logger) -> AsyncGenerator:
+    """Wrap ``client.receive_response()`` to skip ``MessageParseError``.
+
+    The Claude Code CLI may emit message types (e.g. ``rate_limit_event``)
+    that the installed Python SDK does not recognise, causing
+    ``MessageParseError`` which kills the async generator.  The CLI
+    subprocess is still alive and the SDK uses a buffered memory channel,
+    so we restart ``receive_response()`` to continue reading remaining
+    messages without losing data.
+    """
+    max_retries = 50
+    retries = 0
+    while True:
+        try:
+            async for msg in client.receive_response():
+                yield msg
+            return  # Normal completion
+        except Exception as exc:
+            if type(exc).__name__ == "MessageParseError":
+                retries += 1
+                if retries > max_retries:
+                    log.error(f"Too many unrecognized CLI messages ({retries}), stopping")
+                    return
+                log.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
+                continue
+            raise
+
+
 async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
     """Yield a single multimodal user message in Claude Agent SDK format.
 
diff --git a/server/services/expand_chat_session.py b/server/services/expand_chat_session.py
index e305d29b..35a2f5ca 100644
--- a/server/services/expand_chat_session.py
+++ b/server/services/expand_chat_session.py
@@ -23,11 +23,10 @@
 
 from ..schemas import ImageAttachment
 from .chat_constants import (
-    MAX_CHAT_RATE_LIMIT_RETRIES,
     ROOT_DIR,
-    calculate_rate_limit_backoff,
     check_rate_limit_error,
     make_multimodal_message,
+    safe_receive_response,
 )
 
 # Load environment variables from .env file if present
@@ -304,67 +303,32 @@ async def _query_claude(
         else:
             await self.client.query(message)
 
-        # Stream the response (with rate-limit retry)
-        for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
-            try:
-                async for msg in self.client.receive_response():
-                    msg_type = type(msg).__name__
-
-                    if msg_type == "AssistantMessage" and hasattr(msg, "content"):
-                        for block in msg.content:
-                            block_type = type(block).__name__
-
-                            if block_type == "TextBlock" and hasattr(block, "text"):
-                                text = block.text
-                                if text:
-                                    yield {"type": "text", "content": text}
-
-                                    self.messages.append({
-                                        "role": "assistant",
-                                        "content": text,
-                                        "timestamp": datetime.now().isoformat()
-                                    })
-                # Completed successfully — break out of retry loop
-                break
-            except Exception as exc:
-                is_rate_limit, retry_secs = check_rate_limit_error(exc)
-                if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
-                    delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
-                    logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
-                    yield {
-                        "type": "rate_limited",
-                        "retry_in": delay,
-                        "attempt": _attempt + 1,
-                        "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
-                    }
-                    await asyncio.sleep(delay)
-                    # Re-send the query before retrying receive_response
-                    if attachments and len(attachments) > 0:
-                        content_blocks_retry: list[dict[str, Any]] = []
-                        if message:
-                            content_blocks_retry.append({"type": "text", "text": message})
-                        for att in attachments:
-                            content_blocks_retry.append({
-                                "type": "image",
-                                "source": {
-                                    "type": "base64",
-                                    "media_type": att.mimeType,
-                                    "data": att.base64Data,
-                                }
-                            })
-                        await self.client.query(make_multimodal_message(content_blocks_retry))
-                    else:
-                        await self.client.query(message)
-                    continue
-                if is_rate_limit:
-                    logger.error("Rate limit retries exhausted for expand chat")
-                    yield {"type": "error", "content": "Rate limited. Please try again later."}
-                    return
-                # Non-rate-limit MessageParseError: log and break (don't crash)
-                if type(exc).__name__ == "MessageParseError":
-                    logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
-                    break
-                raise
+        # Stream the response
+        try:
+            async for msg in safe_receive_response(self.client, logger):
+                msg_type = type(msg).__name__
+
+                if msg_type == "AssistantMessage" and hasattr(msg, "content"):
+                    for block in msg.content:
+                        block_type = type(block).__name__
+
+                        if block_type == "TextBlock" and hasattr(block, "text"):
+                            text = block.text
+                            if text:
+                                yield {"type": "text", "content": text}
+
+                                self.messages.append({
+                                    "role": "assistant",
+                                    "content": text,
+                                    "timestamp": datetime.now().isoformat()
+                                })
+        except Exception as exc:
+            is_rate_limit, _ = check_rate_limit_error(exc)
+            if is_rate_limit:
+                logger.warning(f"Rate limited: {exc}")
+                yield {"type": "error", "content": "Rate limited. Please try again later."}
+                return
+            raise
 
     def get_features_created(self) -> int:
         """Get the total number of features created in this session."""
diff --git a/server/services/spec_chat_session.py b/server/services/spec_chat_session.py
index 8fdfecb9..a6b55986 100644
--- a/server/services/spec_chat_session.py
+++ b/server/services/spec_chat_session.py
@@ -6,7 +6,6 @@
 Uses the create-spec.md skill to guide users through app spec creation.
 """
 
-import asyncio
 import json
 import logging
 import os
@@ -21,11 +20,10 @@
 
 from ..schemas import ImageAttachment
 from .chat_constants import (
-    MAX_CHAT_RATE_LIMIT_RETRIES,
     ROOT_DIR,
-    calculate_rate_limit_backoff,
     check_rate_limit_error,
     make_multimodal_message,
+    safe_receive_response,
 )
 
 # Load environment variables from .env file if present
@@ -311,145 +309,125 @@ async def _query_claude(
         # Store paths for the completion message
         spec_path = None
 
-        # Stream the response using receive_response (with rate-limit retry)
-        for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
-            try:
-                async for msg in self.client.receive_response():
-                    msg_type = type(msg).__name__
-
-                    if msg_type == "AssistantMessage" and hasattr(msg, "content"):
-                        # Process content blocks in the assistant message
-                        for block in msg.content:
-                            block_type = type(block).__name__
-
-                            if block_type == "TextBlock" and hasattr(block, "text"):
-                                # Accumulate text and yield it
-                                text = block.text
-                                if text:
-                                    current_text += text
-                                    yield {"type": "text", "content": text}
-
-                                    # Store in message history
-                                    self.messages.append({
-                                        "role": "assistant",
-                                        "content": text,
-                                        "timestamp": datetime.now().isoformat()
-                                    })
-
-                            elif block_type == "ToolUseBlock" and hasattr(block, "name"):
-                                tool_name = block.name
-                                tool_input = getattr(block, "input", {})
-                                tool_id = getattr(block, "id", "")
-
-                                if tool_name in ("Write", "Edit"):
-                                    # File being written or edited - track for verification
-                                    file_path = tool_input.get("file_path", "")
-
-                                    # Track app_spec.txt
-                                    if "app_spec.txt" in str(file_path):
-                                        pending_writes["app_spec"] = {
-                                            "tool_id": tool_id,
-                                            "path": file_path
-                                        }
-                                        logger.info(f"{tool_name} tool called for app_spec.txt: {file_path}")
-
-                                    # Track initializer_prompt.md
-                                    elif "initializer_prompt.md" in str(file_path):
-                                        pending_writes["initializer"] = {
-                                            "tool_id": tool_id,
-                                            "path": file_path
+        # Stream the response
+        try:
+            async for msg in safe_receive_response(self.client, logger):
+                msg_type = type(msg).__name__
+
+                if msg_type == "AssistantMessage" and hasattr(msg, "content"):
+                    # Process content blocks in the assistant message
+                    for block in msg.content:
+                        block_type = type(block).__name__
+
+                        if block_type == "TextBlock" and hasattr(block, "text"):
+                            # Accumulate text and yield it
+                            text = block.text
+                            if text:
+                                current_text += text
+                                yield {"type": "text", "content": text}
+
+                                # Store in message history
+                                self.messages.append({
+                                    "role": "assistant",
+                                    "content": text,
+                                    "timestamp": datetime.now().isoformat()
+                                })
+
+                        elif block_type == "ToolUseBlock" and hasattr(block, "name"):
+                            tool_name = block.name
+                            tool_input = getattr(block, "input", {})
+                            tool_id = getattr(block, "id", "")
+
+                            if tool_name in ("Write", "Edit"):
+                                # File being written or edited - track for verification
+                                file_path = tool_input.get("file_path", "")
+
+                                # Track app_spec.txt
+                                if "app_spec.txt" in str(file_path):
+                                    pending_writes["app_spec"] = {
+                                        "tool_id": tool_id,
+                                        "path": file_path
+                                    }
+                                    logger.info(f"{tool_name} tool called for app_spec.txt: {file_path}")
+
+                                # Track initializer_prompt.md
+                                elif "initializer_prompt.md" in str(file_path):
+                                    pending_writes["initializer"] = {
+                                        "tool_id": tool_id,
+                                        "path": file_path
+                                    }
+                                    logger.info(f"{tool_name} tool called for initializer_prompt.md: {file_path}")
+
+                elif msg_type == "UserMessage" and hasattr(msg, "content"):
+                    # Tool results - check for write confirmations and errors
+                    for block in msg.content:
+                        block_type = type(block).__name__
+                        if block_type == "ToolResultBlock":
+                            is_error = getattr(block, "is_error", False)
+                            tool_use_id = getattr(block, "tool_use_id", "")
+
+                            if is_error:
+                                content = getattr(block, "content", "Unknown error")
+                                logger.warning(f"Tool error: {content}")
+                                # Clear any pending writes that failed
+                                for key in pending_writes:
+                                    pending_write = pending_writes[key]
+                                    if pending_write is not None and tool_use_id == pending_write.get("tool_id"):
+                                        logger.error(f"{key} write failed: {content}")
+                                        pending_writes[key] = None
+                            else:
+                                # Tool succeeded - check which file was written
+
+                                # Check app_spec.txt
+                                if pending_writes["app_spec"] and tool_use_id == pending_writes["app_spec"].get("tool_id"):
+                                    file_path = pending_writes["app_spec"]["path"]
+                                    full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
+                                    if full_path.exists():
+                                        logger.info(f"app_spec.txt verified at: {full_path}")
+                                        files_written["app_spec"] = True
+                                        spec_path = file_path
+
+                                        # Notify about file write (but NOT completion yet)
+                                        yield {
+                                            "type": "file_written",
+                                            "path": str(file_path)
                                         }
-                                        logger.info(f"{tool_name} tool called for initializer_prompt.md: {file_path}")
-
-                    elif msg_type == "UserMessage" and hasattr(msg, "content"):
-                        # Tool results - check for write confirmations and errors
-                        for block in msg.content:
-                            block_type = type(block).__name__
-                            if block_type == "ToolResultBlock":
-                                is_error = getattr(block, "is_error", False)
-                                tool_use_id = getattr(block, "tool_use_id", "")
-
-                                if is_error:
-                                    content = getattr(block, "content", "Unknown error")
-                                    logger.warning(f"Tool error: {content}")
-                                    # Clear any pending writes that failed
-                                    for key in pending_writes:
-                                        pending_write = pending_writes[key]
-                                        if pending_write is not None and tool_use_id == pending_write.get("tool_id"):
-                                            logger.error(f"{key} write failed: {content}")
-                                            pending_writes[key] = None
-                                else:
-                                    # Tool succeeded - check which file was written
-
-                                    # Check app_spec.txt
-                                    if pending_writes["app_spec"] and tool_use_id == pending_writes["app_spec"].get("tool_id"):
-                                        file_path = pending_writes["app_spec"]["path"]
-                                        full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
-                                        if full_path.exists():
-                                            logger.info(f"app_spec.txt verified at: {full_path}")
-                                            files_written["app_spec"] = True
-                                            spec_path = file_path
-
-                                            # Notify about file write (but NOT completion yet)
-                                            yield {
-                                                "type": "file_written",
-                                                "path": str(file_path)
-                                            }
-                                        else:
-                                            logger.error(f"app_spec.txt not found after write: {full_path}")
-                                        pending_writes["app_spec"] = None
-
-                                    # Check initializer_prompt.md
-                                    if pending_writes["initializer"] and tool_use_id == pending_writes["initializer"].get("tool_id"):
-                                        file_path = pending_writes["initializer"]["path"]
-                                        full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
-                                        if full_path.exists():
-                                            logger.info(f"initializer_prompt.md verified at: {full_path}")
-                                            files_written["initializer"] = True
-
-                                            # Notify about file write
-                                            yield {
-                                                "type": "file_written",
-                                                "path": str(file_path)
-                                            }
-                                        else:
-                                            logger.error(f"initializer_prompt.md not found after write: {full_path}")
-                                        pending_writes["initializer"] = None
-
-                                    # Check if BOTH files are now written - only then signal completion
-                                    if files_written["app_spec"] and files_written["initializer"]:
-                                        logger.info("Both app_spec.txt and initializer_prompt.md verified - signaling completion")
-                                        self.complete = True
+                                    else:
+                                        logger.error(f"app_spec.txt not found after write: {full_path}")
+                                    pending_writes["app_spec"] = None
+
+                                # Check initializer_prompt.md
+                                if pending_writes["initializer"] and tool_use_id == pending_writes["initializer"].get("tool_id"):
+                                    file_path = pending_writes["initializer"]["path"]
+                                    full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
+                                    if full_path.exists():
+                                        logger.info(f"initializer_prompt.md verified at: {full_path}")
+                                        files_written["initializer"] = True
+
+                                        # Notify about file write
                                         yield {
-                                            "type": "spec_complete",
-                                            "path": str(spec_path)
+                                            "type": "file_written",
+                                            "path": str(file_path)
                                         }
-                # Completed successfully — break out of retry loop
-                break
-            except Exception as exc:
-                is_rate_limit, retry_secs = check_rate_limit_error(exc)
-                if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
-                    delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
-                    logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
-                    yield {
-                        "type": "rate_limited",
-                        "retry_in": delay,
-                        "attempt": _attempt + 1,
-                        "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
-                    }
-                    await asyncio.sleep(delay)
-                    # Re-send the query before retrying receive_response
-                    await self.client.query(message)
-                    continue
-                if is_rate_limit:
-                    logger.error("Rate limit retries exhausted for spec chat")
-                    yield {"type": "error", "content": "Rate limited. Please try again later."}
-                    return
-                # Non-rate-limit MessageParseError: log and break (don't crash)
-                if type(exc).__name__ == "MessageParseError":
-                    logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
-                    break
-                raise
+                                    else:
+                                        logger.error(f"initializer_prompt.md not found after write: {full_path}")
+                                    pending_writes["initializer"] = None
+
+                                # Check if BOTH files are now written - only then signal completion
+                                if files_written["app_spec"] and files_written["initializer"]:
+                                    logger.info("Both app_spec.txt and initializer_prompt.md verified - signaling completion")
+                                    self.complete = True
+                                    yield {
+                                        "type": "spec_complete",
+                                        "path": str(spec_path)
+                                    }
+        except Exception as exc:
+            is_rate_limit, _ = check_rate_limit_error(exc)
+            if is_rate_limit:
+                logger.warning(f"Rate limited: {exc}")
+                yield {"type": "error", "content": "Rate limited. Please try again later."}
+                return
+            raise
 
     def is_complete(self) -> bool:
         """Check if spec creation is complete."""
diff --git a/ui/src/hooks/useAssistantChat.ts b/ui/src/hooks/useAssistantChat.ts
index 86e72f33..cb660f60 100755
--- a/ui/src/hooks/useAssistantChat.ts
+++ b/ui/src/hooks/useAssistantChat.ts
@@ -269,20 +269,6 @@ export function useAssistantChat({
             break;
           }
 
-          case "rate_limited": {
-            // Show rate limit info as system message
-            setMessages((prev) => [
-              ...prev,
-              {
-                id: generateId(),
-                role: "system",
-                content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
-                timestamp: new Date(),
-              },
-            ]);
-            break;
-          }
-
           case "pong": {
             // Keep-alive response, nothing to do
             break;
diff --git a/ui/src/hooks/useExpandChat.ts b/ui/src/hooks/useExpandChat.ts
index ca00a68f..be632a54 100644
--- a/ui/src/hooks/useExpandChat.ts
+++ b/ui/src/hooks/useExpandChat.ts
@@ -226,20 +226,6 @@ export function useExpandChat({
             break
           }
 
-          case 'rate_limited': {
-            // Show rate limit info as system message
-            setMessages((prev) => [
-              ...prev,
-              {
-                id: generateId(),
-                role: 'system',
-                content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
-                timestamp: new Date(),
-              },
-            ])
-            break
-          }
-
           case 'pong': {
             // Keep-alive response, nothing to do
             break
diff --git a/ui/src/hooks/useSpecChat.ts b/ui/src/hooks/useSpecChat.ts
index 6a3a0189..3bd09bb2 100644
--- a/ui/src/hooks/useSpecChat.ts
+++ b/ui/src/hooks/useSpecChat.ts
@@ -322,20 +322,6 @@ export function useSpecChat({
             break
           }
 
-          case 'rate_limited': {
-            // Show rate limit info as system message
-            setMessages((prev) => [
-              ...prev,
-              {
-                id: generateId(),
-                role: 'system',
-                content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
-                timestamp: new Date(),
-              },
-            ])
-            break
-          }
-
           case 'pong': {
             // Keep-alive response, nothing to do
             break
diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts
index d4fed179..504888a4 100644
--- a/ui/src/lib/types.ts
+++ b/ui/src/lib/types.ts
@@ -407,13 +407,6 @@ export interface SpecChatResponseDoneMessage {
   type: 'response_done'
 }
 
-export interface SpecChatRateLimitedMessage {
-  type: 'rate_limited'
-  retry_in: number
-  attempt: number
-  max_attempts: number
-}
-
 export type SpecChatServerMessage =
   | SpecChatTextMessage
   | SpecChatQuestionMessage
@@ -423,7 +416,6 @@ export type SpecChatServerMessage =
   | SpecChatErrorMessage
   | SpecChatPongMessage
   | SpecChatResponseDoneMessage
-  | SpecChatRateLimitedMessage
 
 // Image attachment for chat messages
 export interface ImageAttachment {
@@ -509,13 +501,6 @@ export interface AssistantChatPongMessage {
   type: 'pong'
 }
 
-export interface AssistantChatRateLimitedMessage {
-  type: 'rate_limited'
-  retry_in: number
-  attempt: number
-  max_attempts: number
-}
-
 export type AssistantChatServerMessage =
   | AssistantChatTextMessage
   | AssistantChatToolCallMessage
@@ -524,7 +509,6 @@ export type AssistantChatServerMessage =
   | AssistantChatErrorMessage
   | AssistantChatConversationCreatedMessage
   | AssistantChatPongMessage
-  | AssistantChatRateLimitedMessage
 
 // ============================================================================
 // Expand Chat Types
@@ -548,7 +532,6 @@ export type ExpandChatServerMessage =
   | SpecChatErrorMessage       // Reuse error message type
   | SpecChatPongMessage        // Reuse pong message type
   | SpecChatResponseDoneMessage // Reuse response_done type
-  | SpecChatRateLimitedMessage // Reuse rate_limited message type
 
 // Bulk feature creation
 export interface FeatureBulkCreate {

From dceb535ade5755fa093bfff45078f4372aff9b90 Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Mon, 23 Feb 2026 13:00:47 +0200
Subject: [PATCH 2/3] 0.1.15

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 269eecc0..15686f09 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "autoforge-ai",
-  "version": "0.1.14",
+  "version": "0.1.15",
   "description": "Autonomous coding agent with web UI - build complete apps with AI",
   "license": "AGPL-3.0",
   "bin": {

From afc2f4ac3c5a706379d9c61b463192f05ba3d703 Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Mon, 23 Feb 2026 13:01:20 +0200
Subject: [PATCH 3/3] version patch

---
 ui/package-lock.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ui/package-lock.json b/ui/package-lock.json
index 8d28e5e9..b8998431 100644
--- a/ui/package-lock.json
+++ b/ui/package-lock.json
@@ -56,7 +56,7 @@
     },
     "..": {
       "name": "autoforge-ai",
-      "version": "0.1.14",
+      "version": "0.1.15",
       "license": "AGPL-3.0",
       "bin": {
         "autoforge": "bin/autoforge.js"