diff --git a/.claude/commands/review-pr.md b/.claude/commands/review-pr.md
index b8b3023e..8a40d9bc 100644
--- a/.claude/commands/review-pr.md
+++ b/.claude/commands/review-pr.md
@@ -55,10 +55,10 @@ Pull request(s): $ARGUMENTS
    - Reviewing large, unfocused PRs is impractical and error-prone; the review cannot provide adequate assurance for such changes
 
 6. **Vision Alignment Check**
-   - Read the project's README.md and CLAUDE.md to understand the application's core purpose
-   - Assess whether this PR aligns with the application's intended functionality
-   - If the changes deviate significantly from the core vision or add functionality that doesn't serve the application's purpose, note this in the review
-   - This is not a blocker, but should be flagged for the reviewer's consideration
+   - **VISION.md protection**: First, check whether the PR diff modifies `VISION.md` in any way (edits, deletions, renames). If it does, **stop the review immediately** — verdict is **DON'T MERGE**. VISION.md is immutable and no PR is permitted to alter it. Explain this to the user and skip all remaining steps.
+   - Read the project's `VISION.md`, `README.md`, and `CLAUDE.md` to understand the application's core purpose and mandatory architectural constraints
+   - Assess whether this PR aligns with the vision defined in `VISION.md`
+   - **Vision deviation is a merge blocker.** If the PR introduces functionality, integrations, or architectural changes that conflict with `VISION.md`, the verdict must be **DON'T MERGE**. This is not negotiable — the vision document takes precedence over any PR rationale.
 
 7. **Safety Assessment**
    - Provide a review on whether the PR is safe to merge as-is
diff --git a/.claude/launch.json b/.claude/launch.json
new file mode 100644
index 00000000..728c68f8
--- /dev/null
+++ b/.claude/launch.json
@@ -0,0 +1,18 @@
+{
+  "version": "0.0.1",
+  "configurations": [
+    {
+      "name": "backend",
+      "runtimeExecutable": "python",
+      "runtimeArgs": ["-m", "uvicorn", "server.main:app", "--host", "127.0.0.1", "--port", "8888", "--reload"],
+      "port": 8888
+    },
+    {
+      "name": "frontend",
+      "runtimeExecutable": "cmd",
+      "runtimeArgs": ["/c", "cd ui && npx vite"],
+      "port": 5173
+    }
+  ],
+  "autoVerify": true
+}
diff --git a/VISION.md b/VISION.md
new file mode 100644
index 00000000..3ae6975d
--- /dev/null
+++ b/VISION.md
@@ -0,0 +1,22 @@
+# VISION
+
+This document defines the mandatory project vision for AutoForge. All contributions must align with these principles. PRs that deviate from this vision will be rejected. This file itself is immutable via PR — any PR that modifies VISION.md will be rejected outright.
+
+## Claude Agent SDK Exclusivity
+
+AutoForge is a wrapper around the **Claude Agent SDK**. This is a foundational architectural decision, not a preference.
+
+**What this means:**
+
+- AutoForge only supports providers, models, and integrations that work through the Claude Agent SDK.
+- We will not integrate with, accommodate, or add support for other AI SDKs, CLIs, or coding agent platforms (e.g., Codex, OpenCode, Aider, Continue, Cursor agents, or similar tools).
+
+**Why:**
+
+Each platform has its own approach to MCP tools, skills, context management, and feature integration. Attempting to support multiple agent frameworks creates an unsustainable maintenance burden and dilutes the quality of the core experience. By committing to the Claude Agent SDK exclusively, we can build deep, reliable integration rather than shallow compatibility across many targets.
+
+**In practice:**
+
+- PRs adding support for non-Claude agent frameworks will be rejected.
+- PRs introducing abstractions designed to make AutoForge "agent-agnostic" will be rejected.
+- Alternative API providers (e.g., Vertex AI, AWS Bedrock) are acceptable only when accessed through the Claude Agent SDK's own configuration.
diff --git a/package.json b/package.json
index 7bb52c08..269eecc0 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "autoforge-ai",
-  "version": "0.1.13",
+  "version": "0.1.14",
   "description": "Autonomous coding agent with web UI - build complete apps with AI",
   "license": "AGPL-3.0",
   "bin": {
diff --git a/requirements-prod.txt b/requirements-prod.txt
index 05e7f4cc..1b2f0b22 100644
--- a/requirements-prod.txt
+++ b/requirements-prod.txt
@@ -1,6 +1,6 @@
 # Production runtime dependencies only
 # For development, use requirements.txt (includes ruff, mypy, pytest)
-claude-agent-sdk>=0.1.0,<0.2.0
+claude-agent-sdk>=0.1.39,<0.2.0
 python-dotenv>=1.0.0
 sqlalchemy>=2.0.0
 fastapi>=0.115.0
diff --git a/requirements.txt b/requirements.txt
index 5d57a398..f042b4df 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-claude-agent-sdk>=0.1.0,<0.2.0
+claude-agent-sdk>=0.1.39,<0.2.0
 python-dotenv>=1.0.0
 sqlalchemy>=2.0.0
 fastapi>=0.115.0
diff --git a/server/services/assistant_chat_session.py b/server/services/assistant_chat_session.py
index f030aa4b..f666b1be 100755
--- a/server/services/assistant_chat_session.py
+++ b/server/services/assistant_chat_session.py
@@ -7,6 +7,7 @@
 but cannot modify any files.
 """
 
+import asyncio
 import json
 import logging
 import os
@@ -25,7 +26,12 @@
     create_conversation,
     get_messages,
 )
-from .chat_constants import ROOT_DIR
+from .chat_constants import (
+    MAX_CHAT_RATE_LIMIT_RETRIES,
+    ROOT_DIR,
+    calculate_rate_limit_backoff,
+    check_rate_limit_error,
+)
 
 # Load environment variables from .env file if present
 load_dotenv()
@@ -393,39 +399,66 @@ async def _query_claude(self, message: str) -> AsyncGenerator[dict, None]:
 
         full_response = ""
 
-        # Stream the response
-        async for msg in self.client.receive_response():
-            msg_type = type(msg).__name__
-
-            if msg_type == "AssistantMessage" and hasattr(msg, "content"):
-                for block in msg.content:
-                    block_type = type(block).__name__
-
-                    if block_type == "TextBlock" and hasattr(block, "text"):
-                        text = block.text
-                        if text:
-                            full_response += text
-                            yield {"type": "text", "content": text}
-
-                    elif block_type == "ToolUseBlock" and hasattr(block, "name"):
-                        tool_name = block.name
-                        tool_input = getattr(block, "input", {})
+        # Stream the response (with rate-limit retry)
+        for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
+            try:
+                async for msg in self.client.receive_response():
+                    msg_type = type(msg).__name__
+
+                    if msg_type == "AssistantMessage" and hasattr(msg, "content"):
+                        for block in msg.content:
+                            block_type = type(block).__name__
+
+                            if block_type == "TextBlock" and hasattr(block, "text"):
+                                text = block.text
+                                if text:
+                                    full_response += text
+                                    yield {"type": "text", "content": text}
+
+                            elif block_type == "ToolUseBlock" and hasattr(block, "name"):
+                                tool_name = block.name
+                                tool_input = getattr(block, "input", {})
+
+                                # Intercept ask_user tool calls -> yield as question message
+                                if tool_name == "mcp__features__ask_user":
+                                    questions = tool_input.get("questions", [])
+                                    if questions:
+                                        yield {
+                                            "type": "question",
+                                            "questions": questions,
+                                        }
+                                        continue
 
-                        # Intercept ask_user tool calls -> yield as question message
-                        if tool_name == "mcp__features__ask_user":
-                            questions = tool_input.get("questions", [])
-                            if questions:
                                 yield {
-                                    "type": "question",
-                                    "questions": questions,
+                                    "type": "tool_call",
+                                    "tool": tool_name,
+                                    "input": tool_input,
                                 }
-                                continue
-
-                        yield {
-                            "type": "tool_call",
-                            "tool": tool_name,
-                            "input": tool_input,
-                        }
+                # Completed successfully — break out of retry loop
+                break
+            except Exception as exc:
+                is_rate_limit, retry_secs = check_rate_limit_error(exc)
+                if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
+                    delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
+                    logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
+                    yield {
+                        "type": "rate_limited",
+                        "retry_in": delay,
+                        "attempt": _attempt + 1,
+                        "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
+                    }
+                    await asyncio.sleep(delay)
+                    await self.client.query(message)
+                    continue
+                if is_rate_limit:
+                    logger.error("Rate limit retries exhausted for assistant chat")
+                    yield {"type": "error", "content": "Rate limited. Please try again later."}
+                    return
+                # Non-rate-limit MessageParseError: log and break (don't crash)
+                if type(exc).__name__ == "MessageParseError":
+                    logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
+                    break
+                raise
 
         # Store the complete response in the database
         if full_response and self.conversation_id:
diff --git a/server/services/chat_constants.py b/server/services/chat_constants.py
index 0baddd2c..4dfbcd6b 100644
--- a/server/services/chat_constants.py
+++ b/server/services/chat_constants.py
@@ -9,6 +9,7 @@
 imports (``from .chat_constants import API_ENV_VARS``) continue to work.
 """
 
+import logging
 import sys
 from pathlib import Path
 from typing import AsyncGenerator
@@ -32,6 +33,45 @@
 # imports continue to work unchanged.
 # -------------------------------------------------------------------
 from env_constants import API_ENV_VARS  # noqa: E402, F401
+from rate_limit_utils import calculate_rate_limit_backoff, is_rate_limit_error, parse_retry_after  # noqa: E402, F401
+
+logger = logging.getLogger(__name__)
+
+# -------------------------------------------------------------------
+# Rate-limit handling for chat sessions
+# -------------------------------------------------------------------
+MAX_CHAT_RATE_LIMIT_RETRIES = 3
+
+
+def check_rate_limit_error(exc: Exception) -> tuple[bool, int | None]:
+    """Inspect an exception and determine if it represents a rate-limit.
+
+    Returns ``(is_rate_limit, retry_seconds)``.  ``retry_seconds`` is the
+    parsed Retry-After value when available, otherwise ``None`` (caller
+    should use exponential backoff).
+
+    Handles:
+    - ``MessageParseError`` whose raw *data* dict has
+      ``type == "rate_limit_event"`` (Claude CLI sends this).
+    - Any exception whose string representation matches known rate-limit
+      patterns (via ``rate_limit_utils.is_rate_limit_error``).
+    """
+    exc_str = str(exc)
+
+    # Check for MessageParseError with a rate_limit_event payload
+    cls_name = type(exc).__name__
+    if cls_name == "MessageParseError":
+        raw_data = getattr(exc, "data", None)
+        if isinstance(raw_data, dict) and raw_data.get("type") == "rate_limit_event":
+            retry = parse_retry_after(str(raw_data)) if raw_data else None
+            return True, retry
+
+    # Fallback: match error text against known rate-limit patterns
+    if is_rate_limit_error(exc_str):
+        retry = parse_retry_after(exc_str)
+        return True, retry
+
+    return False, None
 
 
 async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
diff --git a/server/services/expand_chat_session.py b/server/services/expand_chat_session.py
index b06e9d85..e305d29b 100644
--- a/server/services/expand_chat_session.py
+++ b/server/services/expand_chat_session.py
@@ -22,7 +22,13 @@
 from dotenv import load_dotenv
 
 from ..schemas import ImageAttachment
-from .chat_constants import ROOT_DIR, make_multimodal_message
+from .chat_constants import (
+    MAX_CHAT_RATE_LIMIT_RETRIES,
+    ROOT_DIR,
+    calculate_rate_limit_backoff,
+    check_rate_limit_error,
+    make_multimodal_message,
+)
 
 # Load environment variables from .env file if present
 load_dotenv()
@@ -298,24 +304,67 @@ async def _query_claude(
         else:
             await self.client.query(message)
 
-        # Stream the response
-        async for msg in self.client.receive_response():
-            msg_type = type(msg).__name__
-
-            if msg_type == "AssistantMessage" and hasattr(msg, "content"):
-                for block in msg.content:
-                    block_type = type(block).__name__
-
-                    if block_type == "TextBlock" and hasattr(block, "text"):
-                        text = block.text
-                        if text:
-                            yield {"type": "text", "content": text}
-
-                            self.messages.append({
-                                "role": "assistant",
-                                "content": text,
-                                "timestamp": datetime.now().isoformat()
+        # Stream the response (with rate-limit retry)
+        for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
+            try:
+                async for msg in self.client.receive_response():
+                    msg_type = type(msg).__name__
+
+                    if msg_type == "AssistantMessage" and hasattr(msg, "content"):
+                        for block in msg.content:
+                            block_type = type(block).__name__
+
+                            if block_type == "TextBlock" and hasattr(block, "text"):
+                                text = block.text
+                                if text:
+                                    yield {"type": "text", "content": text}
+
+                                    self.messages.append({
+                                        "role": "assistant",
+                                        "content": text,
+                                        "timestamp": datetime.now().isoformat()
+                                    })
+                # Completed successfully — break out of retry loop
+                break
+            except Exception as exc:
+                is_rate_limit, retry_secs = check_rate_limit_error(exc)
+                if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
+                    delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
+                    logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
+                    yield {
+                        "type": "rate_limited",
+                        "retry_in": delay,
+                        "attempt": _attempt + 1,
+                        "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
+                    }
+                    await asyncio.sleep(delay)
+                    # Re-send the query before retrying receive_response
+                    if attachments and len(attachments) > 0:
+                        content_blocks_retry: list[dict[str, Any]] = []
+                        if message:
+                            content_blocks_retry.append({"type": "text", "text": message})
+                        for att in attachments:
+                            content_blocks_retry.append({
+                                "type": "image",
+                                "source": {
+                                    "type": "base64",
+                                    "media_type": att.mimeType,
+                                    "data": att.base64Data,
+                                }
                             })
+                        await self.client.query(make_multimodal_message(content_blocks_retry))
+                    else:
+                        await self.client.query(message)
+                    continue
+                if is_rate_limit:
+                    logger.error("Rate limit retries exhausted for expand chat")
+                    yield {"type": "error", "content": "Rate limited. Please try again later."}
+                    return
+                # Non-rate-limit MessageParseError: log and break (don't crash)
+                if type(exc).__name__ == "MessageParseError":
+                    logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
+                    break
+                raise
 
     def get_features_created(self) -> int:
         """Get the total number of features created in this session."""
diff --git a/server/services/spec_chat_session.py b/server/services/spec_chat_session.py
index d3556173..8fdfecb9 100644
--- a/server/services/spec_chat_session.py
+++ b/server/services/spec_chat_session.py
@@ -6,6 +6,7 @@
 Uses the create-spec.md skill to guide users through app spec creation.
 """
 
+import asyncio
 import json
 import logging
 import os
@@ -19,7 +20,13 @@
 from dotenv import load_dotenv
 
 from ..schemas import ImageAttachment
-from .chat_constants import ROOT_DIR, make_multimodal_message
+from .chat_constants import (
+    MAX_CHAT_RATE_LIMIT_RETRIES,
+    ROOT_DIR,
+    calculate_rate_limit_backoff,
+    check_rate_limit_error,
+    make_multimodal_message,
+)
 
 # Load environment variables from .env file if present
 load_dotenv()
@@ -304,117 +311,145 @@ async def _query_claude(
         # Store paths for the completion message
         spec_path = None
 
-        # Stream the response using receive_response
-        async for msg in self.client.receive_response():
-            msg_type = type(msg).__name__
-
-            if msg_type == "AssistantMessage" and hasattr(msg, "content"):
-                # Process content blocks in the assistant message
-                for block in msg.content:
-                    block_type = type(block).__name__
-
-                    if block_type == "TextBlock" and hasattr(block, "text"):
-                        # Accumulate text and yield it
-                        text = block.text
-                        if text:
-                            current_text += text
-                            yield {"type": "text", "content": text}
-
-                            # Store in message history
-                            self.messages.append({
-                                "role": "assistant",
-                                "content": text,
-                                "timestamp": datetime.now().isoformat()
-                            })
-
-                    elif block_type == "ToolUseBlock" and hasattr(block, "name"):
-                        tool_name = block.name
-                        tool_input = getattr(block, "input", {})
-                        tool_id = getattr(block, "id", "")
-
-                        if tool_name in ("Write", "Edit"):
-                            # File being written or edited - track for verification
-                            file_path = tool_input.get("file_path", "")
-
-                            # Track app_spec.txt
-                            if "app_spec.txt" in str(file_path):
-                                pending_writes["app_spec"] = {
-                                    "tool_id": tool_id,
-                                    "path": file_path
-                                }
-                                logger.info(f"{tool_name} tool called for app_spec.txt: {file_path}")
-
-                            # Track initializer_prompt.md
-                            elif "initializer_prompt.md" in str(file_path):
-                                pending_writes["initializer"] = {
-                                    "tool_id": tool_id,
-                                    "path": file_path
-                                }
-                                logger.info(f"{tool_name} tool called for initializer_prompt.md: {file_path}")
-
-            elif msg_type == "UserMessage" and hasattr(msg, "content"):
-                # Tool results - check for write confirmations and errors
-                for block in msg.content:
-                    block_type = type(block).__name__
-                    if block_type == "ToolResultBlock":
-                        is_error = getattr(block, "is_error", False)
-                        tool_use_id = getattr(block, "tool_use_id", "")
-
-                        if is_error:
-                            content = getattr(block, "content", "Unknown error")
-                            logger.warning(f"Tool error: {content}")
-                            # Clear any pending writes that failed
-                            for key in pending_writes:
-                                pending_write = pending_writes[key]
-                                if pending_write is not None and tool_use_id == pending_write.get("tool_id"):
-                                    logger.error(f"{key} write failed: {content}")
-                                    pending_writes[key] = None
-                        else:
-                            # Tool succeeded - check which file was written
-
-                            # Check app_spec.txt
-                            if pending_writes["app_spec"] and tool_use_id == pending_writes["app_spec"].get("tool_id"):
-                                file_path = pending_writes["app_spec"]["path"]
-                                full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
-                                if full_path.exists():
-                                    logger.info(f"app_spec.txt verified at: {full_path}")
-                                    files_written["app_spec"] = True
-                                    spec_path = file_path
-
-                                    # Notify about file write (but NOT completion yet)
-                                    yield {
-                                        "type": "file_written",
-                                        "path": str(file_path)
-                                    }
-                                else:
-                                    logger.error(f"app_spec.txt not found after write: {full_path}")
-                                pending_writes["app_spec"] = None
-
-                            # Check initializer_prompt.md
-                            if pending_writes["initializer"] and tool_use_id == pending_writes["initializer"].get("tool_id"):
-                                file_path = pending_writes["initializer"]["path"]
-                                full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
-                                if full_path.exists():
-                                    logger.info(f"initializer_prompt.md verified at: {full_path}")
-                                    files_written["initializer"] = True
-
-                                    # Notify about file write
-                                    yield {
-                                        "type": "file_written",
-                                        "path": str(file_path)
-                                    }
+        # Stream the response using receive_response (with rate-limit retry)
+        for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
+            try:
+                async for msg in self.client.receive_response():
+                    msg_type = type(msg).__name__
+
+                    if msg_type == "AssistantMessage" and hasattr(msg, "content"):
+                        # Process content blocks in the assistant message
+                        for block in msg.content:
+                            block_type = type(block).__name__
+
+                            if block_type == "TextBlock" and hasattr(block, "text"):
+                                # Accumulate text and yield it
+                                text = block.text
+                                if text:
+                                    current_text += text
+                                    yield {"type": "text", "content": text}
+
+                                    # Store in message history
+                                    self.messages.append({
+                                        "role": "assistant",
+                                        "content": text,
+                                        "timestamp": datetime.now().isoformat()
+                                    })
+
+                            elif block_type == "ToolUseBlock" and hasattr(block, "name"):
+                                tool_name = block.name
+                                tool_input = getattr(block, "input", {})
+                                tool_id = getattr(block, "id", "")
+
+                                if tool_name in ("Write", "Edit"):
+                                    # File being written or edited - track for verification
+                                    file_path = tool_input.get("file_path", "")
+
+                                    # Track app_spec.txt
+                                    if "app_spec.txt" in str(file_path):
+                                        pending_writes["app_spec"] = {
+                                            "tool_id": tool_id,
+                                            "path": file_path
+                                        }
+                                        logger.info(f"{tool_name} tool called for app_spec.txt: {file_path}")
+
+                                    # Track initializer_prompt.md
+                                    elif "initializer_prompt.md" in str(file_path):
+                                        pending_writes["initializer"] = {
+                                            "tool_id": tool_id,
+                                            "path": file_path
+                                        }
+                                        logger.info(f"{tool_name} tool called for initializer_prompt.md: {file_path}")
+
+                    elif msg_type == "UserMessage" and hasattr(msg, "content"):
+                        # Tool results - check for write confirmations and errors
+                        for block in msg.content:
+                            block_type = type(block).__name__
+                            if block_type == "ToolResultBlock":
+                                is_error = getattr(block, "is_error", False)
+                                tool_use_id = getattr(block, "tool_use_id", "")
+
+                                if is_error:
+                                    content = getattr(block, "content", "Unknown error")
+                                    logger.warning(f"Tool error: {content}")
+                                    # Clear any pending writes that failed
+                                    for key in pending_writes:
+                                        pending_write = pending_writes[key]
+                                        if pending_write is not None and tool_use_id == pending_write.get("tool_id"):
+                                            logger.error(f"{key} write failed: {content}")
+                                            pending_writes[key] = None
                                 else:
-                                    logger.error(f"initializer_prompt.md not found after write: {full_path}")
-                                pending_writes["initializer"] = None
-
-                            # Check if BOTH files are now written - only then signal completion
-                            if files_written["app_spec"] and files_written["initializer"]:
-                                logger.info("Both app_spec.txt and initializer_prompt.md verified - signaling completion")
-                                self.complete = True
-                                yield {
-                                    "type": "spec_complete",
-                                    "path": str(spec_path)
-                                }
+                                    # Tool succeeded - check which file was written
+
+                                    # Check app_spec.txt
+                                    if pending_writes["app_spec"] and tool_use_id == pending_writes["app_spec"].get("tool_id"):
+                                        file_path = pending_writes["app_spec"]["path"]
+                                        full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
+                                        if full_path.exists():
+                                            logger.info(f"app_spec.txt verified at: {full_path}")
+                                            files_written["app_spec"] = True
+                                            spec_path = file_path
+
+                                            # Notify about file write (but NOT completion yet)
+                                            yield {
+                                                "type": "file_written",
+                                                "path": str(file_path)
+                                            }
+                                        else:
+                                            logger.error(f"app_spec.txt not found after write: {full_path}")
+                                        pending_writes["app_spec"] = None
+
+                                    # Check initializer_prompt.md
+                                    if pending_writes["initializer"] and tool_use_id == pending_writes["initializer"].get("tool_id"):
+                                        file_path = pending_writes["initializer"]["path"]
+                                        full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
+                                        if full_path.exists():
+                                            logger.info(f"initializer_prompt.md verified at: {full_path}")
+                                            files_written["initializer"] = True
+
+                                            # Notify about file write
+                                            yield {
+                                                "type": "file_written",
+                                                "path": str(file_path)
+                                            }
+                                        else:
+                                            logger.error(f"initializer_prompt.md not found after write: {full_path}")
+                                        pending_writes["initializer"] = None
+
+                                    # Check if BOTH files are now written - only then signal completion
+                                    if files_written["app_spec"] and files_written["initializer"]:
+                                        logger.info("Both app_spec.txt and initializer_prompt.md verified - signaling completion")
+                                        self.complete = True
+                                        yield {
+                                            "type": "spec_complete",
+                                            "path": str(spec_path)
+                                        }
+                # Completed successfully — break out of retry loop
+                break
+            except Exception as exc:
+                is_rate_limit, retry_secs = check_rate_limit_error(exc)
+                if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
+                    delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
+                    logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
+                    yield {
+                        "type": "rate_limited",
+                        "retry_in": delay,
+                        "attempt": _attempt + 1,
+                        "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
+                    }
+                    await asyncio.sleep(delay)
+                    # Re-send the query before retrying receive_response
+                    await self.client.query(message)
+                    continue
+                if is_rate_limit:
+                    logger.error("Rate limit retries exhausted for spec chat")
+                    yield {"type": "error", "content": "Rate limited. Please try again later."}
+                    return
+                # Non-rate-limit MessageParseError: log and break (don't crash)
+                if type(exc).__name__ == "MessageParseError":
+                    logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
+                    break
+                raise
 
     def is_complete(self) -> bool:
         """Check if spec creation is complete."""
diff --git a/ui/package-lock.json b/ui/package-lock.json
index 8624a5ed..8d28e5e9 100644
--- a/ui/package-lock.json
+++ b/ui/package-lock.json
@@ -56,7 +56,7 @@
     },
     "..": {
       "name": "autoforge-ai",
-      "version": "0.1.13",
+      "version": "0.1.14",
       "license": "AGPL-3.0",
       "bin": {
         "autoforge": "bin/autoforge.js"
diff --git a/ui/src/hooks/useAssistantChat.ts b/ui/src/hooks/useAssistantChat.ts
index cb660f60..86e72f33 100755
--- a/ui/src/hooks/useAssistantChat.ts
+++ b/ui/src/hooks/useAssistantChat.ts
@@ -269,6 +269,20 @@ export function useAssistantChat({
             break;
           }
 
+          case "rate_limited": {
+            // Show rate limit info as system message
+            setMessages((prev) => [
+              ...prev,
+              {
+                id: generateId(),
+                role: "system",
+                content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
+                timestamp: new Date(),
+              },
+            ]);
+            break;
+          }
+
           case "pong": {
             // Keep-alive response, nothing to do
             break;
diff --git a/ui/src/hooks/useExpandChat.ts b/ui/src/hooks/useExpandChat.ts
index be632a54..ca00a68f 100644
--- a/ui/src/hooks/useExpandChat.ts
+++ b/ui/src/hooks/useExpandChat.ts
@@ -226,6 +226,20 @@ export function useExpandChat({
             break
           }
 
+          case 'rate_limited': {
+            // Show rate limit info as system message
+            setMessages((prev) => [
+              ...prev,
+              {
+                id: generateId(),
+                role: 'system',
+                content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
+                timestamp: new Date(),
+              },
+            ])
+            break
+          }
+
           case 'pong': {
             // Keep-alive response, nothing to do
             break
diff --git a/ui/src/hooks/useSpecChat.ts b/ui/src/hooks/useSpecChat.ts
index 3bd09bb2..6a3a0189 100644
--- a/ui/src/hooks/useSpecChat.ts
+++ b/ui/src/hooks/useSpecChat.ts
@@ -322,6 +322,20 @@ export function useSpecChat({
             break
           }
 
+          case 'rate_limited': {
+            // Show rate limit info as system message
+            setMessages((prev) => [
+              ...prev,
+              {
+                id: generateId(),
+                role: 'system',
+                content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
+                timestamp: new Date(),
+              },
+            ])
+            break
+          }
+
           case 'pong': {
             // Keep-alive response, nothing to do
             break
diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts
index 504888a4..d4fed179 100644
--- a/ui/src/lib/types.ts
+++ b/ui/src/lib/types.ts
@@ -407,6 +407,13 @@ export interface SpecChatResponseDoneMessage {
   type: 'response_done'
 }
 
+export interface SpecChatRateLimitedMessage {
+  type: 'rate_limited'
+  retry_in: number
+  attempt: number
+  max_attempts: number
+}
+
 export type SpecChatServerMessage =
   | SpecChatTextMessage
   | SpecChatQuestionMessage
@@ -416,6 +423,7 @@ export type SpecChatServerMessage =
   | SpecChatErrorMessage
   | SpecChatPongMessage
   | SpecChatResponseDoneMessage
+  | SpecChatRateLimitedMessage
 
 // Image attachment for chat messages
 export interface ImageAttachment {
@@ -501,6 +509,13 @@ export interface AssistantChatPongMessage {
   type: 'pong'
 }
 
+export interface AssistantChatRateLimitedMessage {
+  type: 'rate_limited'
+  retry_in: number
+  attempt: number
+  max_attempts: number
+}
+
 export type AssistantChatServerMessage =
   | AssistantChatTextMessage
   | AssistantChatToolCallMessage
@@ -509,6 +524,7 @@ export type AssistantChatServerMessage =
   | AssistantChatErrorMessage
   | AssistantChatConversationCreatedMessage
   | AssistantChatPongMessage
+  | AssistantChatRateLimitedMessage
 
 // ============================================================================
 // Expand Chat Types
@@ -532,6 +548,7 @@ export type ExpandChatServerMessage =
   | SpecChatErrorMessage       // Reuse error message type
   | SpecChatPongMessage        // Reuse pong message type
   | SpecChatResponseDoneMessage // Reuse response_done type
+  | SpecChatRateLimitedMessage // Reuse rate_limited message type
 
 // Bulk feature creation
 export interface FeatureBulkCreate {