diff --git a/README.md b/README.md
index bd0eb5ab..f8d60acb 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,18 @@ A powerful, feature-rich command-line interface for interacting with Model Conte
 - **Multimodal page_fault**: Image pages return multi-block content (text + image_url) so multimodal models can re-analyze recalled images
 - **`/memory page <id> --download`**: Export page content to local files with modality-aware extensions (.txt, .json, .png)
 
+### Execution Plans (Tier 6)
+- **`/plan` command**: Create, inspect, and execute reproducible tool call graphs — `create`, `list`, `show`, `run`, `delete`, `resume`
+- **Model-driven planning (`--plan-tools`)**: The LLM autonomously creates and executes plans during conversation — no `/plan` command needed. It calls `plan_create_and_execute` when multi-step orchestration is required, and uses regular tools for simple tasks. Each step renders with real-time progress in the terminal
+- **Parallel batch execution**: Independent plan steps run concurrently via topological batching (Kahn's BFS), with configurable `max_concurrency`
+- **Variable resolution**: `${var}`, `${var.field}` nested access, and template strings like `"https://${api.host}/users"` — type-preserving for single refs
+- **Dry-run mode**: Trace planned tool calls without executing — safe for production inspection
+- **Checkpointing & resume**: Execution state persisted after each batch; resume interrupted plans with `/plan resume <id>`
+- **Guard integration**: Plans respect existing budget, per-tool limits, and runaway detection guards
+- **DAG visualization**: ASCII rendering with status indicators (○/◉/●/✗) and parallel markers (∥)
+- **Re-planning**: Optional LLM-based re-planning on step failure (`enable_replan=True`)
+- **Powered by**: [chuk-ai-planner](https://github.com/chrishayuk/chuk-ai-planner) graph-based plan DSL
+
 ### MCP Apps (SEP-1865)
 - **Interactive HTML UIs**: MCP servers can serve interactive HTML applications (charts, tables, maps, markdown viewers) that render in your browser
 - **Sandboxed iframes**: Apps run in secure sandboxed iframes with CSP protection
@@ -41,9 +53,9 @@ A powerful, feature-rich command-line interface for interacting with Model Conte
 
 ### Code Quality
 - **Core/UI Separation**: Core modules use `logging` only — no UI imports
-- **3,200+ tests**: Comprehensive test suite with branch coverage, integration tests, and 60% minimum threshold
+- **3,800+ tests**: Comprehensive test suite with branch coverage, integration tests, and 60% minimum threshold
 - **15 Architecture Principles**: Documented and enforced (see [architecture.md](architecture.md))
-- **Full [Roadmap](roadmap.md)**: Tiers 1-5 complete, Tiers 6-12 planned (plans, traces, skills, scheduling, multi-agent)
+- **Full [Roadmap](roadmap.md)**: Tiers 1-6 complete, Tiers 7-12 planned (traces, memory scopes, skills, scheduling, multi-agent)
 
 ## 🔄 Architecture Overview
 
@@ -108,6 +120,18 @@ MCP CLI supports all providers and models from CHUK-LLM, including cutting-edge
 - **Session Persistence**: Message queuing during disconnects, automatic reconnection, deferred tool result delivery
 - **structuredContent Support**: Full MCP spec compliance including structured content extraction and forwarding
 
+### Execution Plans (Powered by chuk-ai-planner)
+- **Plan Creation**: Generate execution plans from natural language descriptions using LLM-based plan agents
+- **Model-Driven Planning**: With `--plan-tools`, the LLM autonomously decides when to plan — calls `plan_create_and_execute` for complex multi-step tasks, uses regular tools for simple ones
+- **DAG Execution**: Plans are directed acyclic graphs — independent steps run in parallel batches, dependent steps wait
+- **Variable Resolution**: Step outputs bind to variables (`result_variable`), referenced by later steps as `${var}` or `${var.field}`
+- **Dry-Run Mode**: Trace what a plan would do without executing any tools — safe for production
+- **Checkpointing**: Execution state saved after each batch; resume interrupted plans without re-running completed steps
+- **Guard Integration**: Plans share budget and per-tool limits with the conversation — no bypass
+- **Re-planning**: On step failure, optionally invoke the LLM to generate a revised plan for remaining work
+- **DAG Visualization**: ASCII rendering shows dependency structure, batch grouping, and parallel markers
+- **Persistence**: Plans stored as JSON at `~/.mcp-cli/plans/`
+
 ### Advanced Configuration Management
 - **Environment Integration**: API keys and settings via environment variables
 - **File-based Config**: YAML and JSON configuration files
@@ -136,6 +160,7 @@ Comprehensive documentation is available in the `docs/` directory:
 - **[Token Management](docs/TOKEN_MANAGEMENT.md)** - Comprehensive token management for providers and servers including OAuth, bearer tokens, and API keys
 
 ### Specialized Documentation
+- **[Execution Plans](docs/PLANNING.md)** - Plan creation, parallel execution, variable resolution, checkpointing, guards, and re-planning
 - **[MCP Apps](docs/MCP_APPS.md)** - Interactive browser UIs served by MCP servers (SEP-1865)
 - **[OAuth Authentication](docs/OAUTH.md)** - OAuth flows, storage backends, and MCP server integration
 - **[Streaming Integration](docs/STREAMING.md)** - Real-time response streaming architecture
diff --git a/docs/COMMANDS.md b/docs/COMMANDS.md
index eec7bab1..e8bd26e4 100644
--- a/docs/COMMANDS.md
+++ b/docs/COMMANDS.md
@@ -331,6 +331,27 @@ mcp-cli --server sqlite --vm --vm-budget 500
 mcp-cli --server sqlite --vm --vm-mode relaxed
 ```
 
+### Model-Driven Planning
+
+Enable the LLM to autonomously create and execute multi-step plans during conversation:
+
+```bash
+# Enable plan tools — the model decides WHEN to plan
+mcp-cli --server sqlite --plan-tools
+
+# Or with the chat subcommand
+mcp-cli chat --server sqlite --plan-tools
+```
+
+When enabled, three internal tools are added to the LLM's tool list:
+- `plan_create_and_execute` — generate and execute a plan in one call (most common)
+- `plan_create` — generate a plan without executing it
+- `plan_execute` — execute a previously created plan by ID
+
+The model calls these when it determines a task requires multi-step coordination (e.g., "read a file, find importers, run tests"). For simple tasks, it calls tools directly.
+
+See [PLANNING.md](./PLANNING.md) for full documentation.
+
 ### Token Usage
 
 Track API token consumption across your conversation:
diff --git a/docs/PLANNING.md b/docs/PLANNING.md
new file mode 100644
index 00000000..7e0ce266
--- /dev/null
+++ b/docs/PLANNING.md
@@ -0,0 +1,469 @@
+# Execution Plans
+
+Execution plans are reproducible, inspectable, parallelizable tool call graphs. Instead of ad-hoc conversation-driven tool use, plans define a structured DAG of steps with explicit dependencies, variable bindings, and guard integration.
+
+**Powered by:** [chuk-ai-planner](https://github.com/chrishayuk/chuk-ai-planner) — graph-based plan DSL, executor, and LLM plan generation.
+
+## Quick Start
+
+```bash
+# In chat mode or interactive mode:
+/plan create "read the auth module, find all usages, then refactor"
+/plan list
+/plan show <id>
+/plan run <id>
+/plan run <id> --dry-run
+/plan resume <id>
+/plan delete <id>
+```
+
+## Architecture
+
+```
+User Intent
+    ↓
+PlanAgent (LLM)          ← generates plan from description
+    ↓
+UniversalPlan            ← graph-based plan object
+    ↓
+PlanRunner               ← orchestrates execution
+    ↓
+McpToolBackend           ← bridges to ToolManager
+    ↓
+MCP Servers              ← actual tool execution
+```
+
+### Key Components
+
+| Component | File | Purpose |
+|-----------|------|---------|
+| `McpToolBackend` | `planning/backends.py` | Bridges chuk-ai-planner's `ToolExecutionBackend` protocol to `ToolManager.execute_tool()` |
+| `PlanningContext` | `planning/context.py` | State container: graph store, plan registry, tool manager, tool catalog |
+| `PlanRunner` | `planning/executor.py` | Orchestrates execution: batching, concurrency, checkpointing, dry-run, re-planning |
+| `PlanCommand` | `commands/plan/plan.py` | Unified command interface (`/plan` in all modes) |
+| `plan tools` | `planning/tools.py` | Tool definitions + handlers for model-driven planning (`--plan-tools`) |
+
+### Module Layout
+
+```
+src/mcp_cli/
+  planning/
+    __init__.py          # Public API exports
+    backends.py          # McpToolBackend + guard helpers
+    context.py           # PlanningContext (state + registry)
+    executor.py          # PlanRunner + batching + variables + DAG viz
+    tools.py             # Plan-as-a-Tool: LLM-callable plan tools (--plan-tools)
+  commands/
+    plan/
+      plan.py            # /plan command (create, list, show, run, delete, resume)
+```
+
+## Plan Format
+
+Plans are JSON objects stored at `~/.mcp-cli/plans/`:
+
+```json
+{
+  "id": "refactor-auth-001",
+  "title": "Refactor Auth Module",
+  "variables": {
+    "module_path": "src/auth/handler.py"
+  },
+  "steps": [
+    {
+      "index": "1",
+      "title": "Read auth module",
+      "tool": "read_file",
+      "args": {"path": "${module_path}"},
+      "depends_on": [],
+      "result_variable": "auth_code"
+    },
+    {
+      "index": "2",
+      "title": "Find all usages",
+      "tool": "search_code",
+      "args": {"query": "from auth.handler import"},
+      "depends_on": [],
+      "result_variable": "usages"
+    },
+    {
+      "index": "3",
+      "title": "Write refactored module",
+      "tool": "write_file",
+      "args": {"path": "${module_path}", "content": "refactored code"},
+      "depends_on": ["1", "2"],
+      "result_variable": "write_result"
+    },
+    {
+      "index": "4",
+      "title": "Run tests",
+      "tool": "run_tests",
+      "args": {"path": "tests/auth/"},
+      "depends_on": ["3"],
+      "result_variable": "test_results"
+    }
+  ]
+}
+```
+
+### Step Fields
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `index` | string | yes | Unique step identifier (1-based) |
+| `title` | string | yes | Human-readable step description |
+| `tool` | string | yes | MCP tool name to execute |
+| `args` | dict | yes | Tool arguments (may contain `${var}` references) |
+| `depends_on` | list[string] | yes | Indices of steps that must complete first |
+| `result_variable` | string | no | Variable name to store the step's result |
+
+## Parallel Execution
+
+Steps are grouped into **topological batches** using Kahn's BFS algorithm. Steps within a batch have no dependencies on each other and run concurrently via `asyncio.gather()`.
+
+### Example: Diamond DAG
+
+```
+Step 1: Fetch data        (no deps)
+Step 2: Validate schema   (depends on 1)
+Step 3: Transform format  (depends on 1)
+Step 4: Aggregate results (depends on 2, 3)
+```
+
+Batches:
+- **Batch 1**: Step 1 (serial — single step)
+- **Batch 2**: Steps 2, 3 (parallel — both depend only on step 1)
+- **Batch 3**: Step 4 (serial — waits for batch 2)
+
+With 200ms per tool call, the diamond executes in ~600ms instead of ~800ms serial.
+
+### Concurrency Control
+
+```python
+runner = PlanRunner(context, max_concurrency=4)
+```
+
+The `max_concurrency` parameter limits how many steps run simultaneously within a batch using `asyncio.Semaphore`. Default is 4.
+
+## Variable Resolution
+
+Step outputs can be stored as variables and referenced by later steps.
+
+### Patterns
+
+| Pattern | Behavior | Example |
+|---------|----------|---------|
+| `${var}` | Direct replacement (type-preserving) | `{"data": "${users}"}` → `{"data": [{"id": 1}, ...]}` |
+| `${var.field}` | Nested dict access | `{"host": "${api.host}"}` → `{"host": "api.example.com"}` |
+| `"text ${var} more"` | Template string interpolation | `"https://${api.host}/${api.version}/users"` → `"https://api.example.com/v2/users"` |
+
+**Type preservation:** A single `${var}` reference returns the original value (dict, list, int, etc.). Template strings with surrounding text always produce strings.
+
+### Plan Variables
+
+Plans can define initial variables in the `variables` field:
+
+```json
+{
+  "variables": {
+    "api": {"host": "api.example.com", "version": "v2"},
+    "output_dir": "/tmp/results"
+  }
+}
+```
+
+Variables can also be passed at execution time:
+
+```python
+result = await runner.execute_plan(plan, variables={"date": "2026-03-01"})
+```
+
+## Dry-Run Mode
+
+Trace what a plan would do without executing any tools:
+
+```
+/plan run <id> --dry-run
+```
+
+In dry-run mode:
+- Each step is logged with its tool name, resolved arguments, and dependencies
+- Variables are simulated (bound to `<tool_name result>` placeholders)
+- No tools are executed — safe to run in production
+- Returns a `PlanExecutionResult` with all step results marked as dry-run
+
+## Checkpointing & Resume
+
+Execution state is persisted after each batch to `~/.mcp-cli/plans/{id}_state.json`:
+
+```json
+{
+  "plan_id": "refactor-auth-001",
+  "status": "running",
+  "completed_steps": ["1", "2"],
+  "variables": {
+    "auth_code": "def handle_auth(request)...",
+    "usages": "Found 12 usages across 5 files..."
+  }
+}
+```
+
+If execution is interrupted (crash, Ctrl+C, step failure), resume with:
+
+```
+/plan resume <id>
+```
+
+This loads the checkpoint, skips completed steps, and continues from where it left off with the saved variable context.
+
+## Guard Integration
+
+Plan execution respects mcp-cli's existing guard infrastructure:
+
+- **Pre-execution checks**: `ToolStateManager.check_all_guards()` runs before each step — budget limits, runaway detection, per-tool caps
+- **Post-execution recording**: `ToolStateManager.record_tool_call()` tracks usage and updates value bindings
+- **Shared budget**: Plan tool calls count against the same budget as conversation tool calls
+- **Guard blocking**: If a guard blocks a step (e.g., budget exhausted), the step fails with a guard error message and the tool is never called
+
+Guards can be disabled for testing:
+
+```python
+backend = McpToolBackend(tool_manager, enable_guards=False)
+```
+
+## Re-planning
+
+When enabled, the runner can invoke the LLM to generate a revised plan on step failure:
+
+```python
+runner = PlanRunner(
+    context,
+    enable_replan=True,   # Off by default
+    max_replans=2,        # Max re-plan attempts
+)
+```
+
+On failure:
+1. The runner collects context: completed steps, failed step error, remaining steps, current variables
+2. A `PlanAgent` generates a revised plan for the remaining work
+3. The revised plan executes with the current variable context
+4. Results are merged: completed steps from the original plan + steps from the revised plan
+5. `PlanExecutionResult.replanned = True` indicates re-planning occurred
+
+Re-planning does not recurse — a revised plan that fails simply fails.
+
+## DAG Visualization
+
+Plans render as ASCII DAGs in the terminal:
+
+```
+○ 1. Read auth module                    [read_file]
+○ 2. Find all usages                     [search_code]  ∥
+
+○ 3. Write refactored module             [write_file]   ← after: 1, 2
+
+○ 4. Run tests                           [run_tests]    ← after: 3
+```
+
+Status indicators:
+- `○` pending
+- `◉` running
+- `●` completed
+- `✗` failed
+- `∥` parallel (runs concurrently with other steps in the same batch)
+
+Use `render_plan_dag(plan_data)` programmatically:
+
+```python
+from mcp_cli.planning.executor import render_plan_dag
+
+dag = render_plan_dag(plan_data)
+print(dag)
+```
+
+## Programmatic API
+
+### PlanRunner
+
+```python
+from mcp_cli.planning.context import PlanningContext
+from mcp_cli.planning.executor import PlanRunner
+
+# Create context with a ToolManager
+ctx = PlanningContext(tool_manager)
+
+# Create runner with options
+runner = PlanRunner(
+    ctx,
+    on_step_start=lambda idx, title, tool: print(f"  [{idx}] {title}"),
+    on_step_complete=lambda result: print(f"    -> {'OK' if result.success else 'FAIL'}"),
+    enable_guards=True,
+    max_concurrency=4,
+    enable_replan=False,
+)
+
+# Execute a plan
+result = await runner.execute_plan(plan_data, dry_run=False, checkpoint=True)
+
+print(f"Success: {result.success}")
+print(f"Steps: {len(result.steps)}")
+print(f"Duration: {result.total_duration:.2f}s")
+print(f"Variables: {list(result.variables.keys())}")
+```
+
+### McpToolBackend
+
+```python
+from mcp_cli.planning.backends import McpToolBackend
+from chuk_ai_planner.execution.models import ToolExecutionRequest
+
+backend = McpToolBackend(tool_manager, enable_guards=True)
+
+request = ToolExecutionRequest(
+    tool_name="read_file",
+    args={"path": "/tmp/test.txt"},
+    step_id="step-1",
+)
+result = await backend.execute_tool(request)
+```
+
+### Batch Computation
+
+```python
+from mcp_cli.planning.executor import _compute_batches
+
+steps = [
+    {"index": "1", "title": "Fetch", "depends_on": []},
+    {"index": "2", "title": "Parse A", "depends_on": ["1"]},
+    {"index": "3", "title": "Parse B", "depends_on": ["1"]},
+    {"index": "4", "title": "Merge", "depends_on": ["2", "3"]},
+]
+
+batches = _compute_batches(steps)
+# [[step1], [step2, step3], [step4]]
+```
+
+## Model-Driven Planning (Plan as a Tool)
+
+With the `--plan-tools` flag, the LLM can autonomously create and execute plans during conversation. Instead of the user typing `/plan create`, the model itself decides when multi-step orchestration is needed.
+
+### Enabling
+
+```bash
+# Enable plan tools in chat mode
+mcp-cli --server sqlite --plan-tools
+
+# Or with the chat subcommand
+mcp-cli chat --server sqlite --plan-tools
+```
+
+### How It Works
+
+Three internal tools are injected into the LLM's tool list:
+
+| Tool | Purpose |
+|------|---------|
+| `plan_create` | Generate a plan from a goal description, returns plan ID + step summary |
+| `plan_execute` | Execute a previously created plan by ID |
+| `plan_create_and_execute` | Generate and execute in one call (most common) |
+
+These tools are **intercepted** in `tool_processor.py` before MCP routing — the same pattern used by VM and memory tools. They never reach the MCP server.
+
+### Example Flow
+
+```
+User: "Read the auth module, find all files that import it, and run the tests"
+
+Model (internally): This needs 3 coordinated steps.
+  → calls plan_create_and_execute(goal="Read auth module, find importers, run tests")
+  → PlanAgent generates: [read_file] → [search_code] → [run_tests]
+  → PlanRunner executes all 3 steps
+  → Results returned as tool result
+
+Model: "The auth module contains handle_auth() and verify_jwt().
+        It's imported in 6 files across src/ and tests/.
+        All 8 tests passed (2 skipped)."
+```
+
+For simple single-tool tasks, the model calls the tool directly — no planning overhead.
+
+### Display Integration
+
+Plan execution renders step-by-step in the terminal using the same `StreamingDisplayManager` as regular tool calls. Each MCP tool call within the plan gets its own spinner and result display:
+
+```
+✓ plan_create_and_execute completed in 17.08s
+   Result: Plan generated: Weather for Leavenheath (2 steps)
+✓ geocode_location completed in 0.58s
+   Result keys: results, generationtime_ms
+✓ get_weather_forecast completed in 0.43s
+   Result keys: latitude, longitude, elevation, ...
+```
+
+The `ui_manager` is passed through from `tool_processor.py` → `handle_plan_tool()` → `PlanRunner` callbacks, so the user sees real-time progress rather than a single long-running spinner.
+
+### Programmatic API
+
+```python
+from mcp_cli.planning.tools import get_plan_tools_as_dicts, handle_plan_tool
+from mcp_cli.planning.context import PlanningContext
+
+# Get OpenAI-format tool definitions
+plan_tools = get_plan_tools_as_dicts()  # 3 tool dicts
+
+# Execute a plan tool (ui_manager is optional, for step-by-step display)
+ctx = PlanningContext(tool_manager)
+result_json = await handle_plan_tool(
+    "plan_create_and_execute",
+    {"goal": "Read file and run tests"},
+    ctx,
+    ui_manager=ui_manager,  # optional: enables per-step progress
+)
+```
+
+## Examples
+
+Self-contained demos in `examples/planning/` (no API key or MCP server needed):
+
+```bash
+# Plan CRUD, DAG visualization, persistence
+uv run python examples/planning/plan_basics_demo.py
+
+# Dry-run, live execution, variables, checkpoints, failure handling
+uv run python examples/planning/plan_execution_demo.py
+
+# Topological batching, concurrent steps, timing evidence
+uv run python examples/planning/plan_parallel_demo.py
+
+# Budget limits, per-tool caps, result recording, error handling
+uv run python examples/planning/plan_guard_demo.py
+```
+
+### Model-Driven Planning Demo (requires OPENAI_API_KEY)
+
+```bash
+# LLM decides WHEN to plan — uses plan_create_and_execute for complex tasks,
+# calls tools directly for simple ones
+uv run python examples/planning/plan_as_tool_demo.py
+
+# Use a different model
+uv run python examples/planning/plan_as_tool_demo.py --model gpt-4o
+
+# Custom task description
+uv run python examples/planning/plan_as_tool_demo.py --prompt "read the config, search for usages, and run tests"
+```
+
+## Tests
+
+200+ tests covering all planning functionality:
+
+```bash
+# Run planning tests
+uv run pytest tests/planning/ -v
+
+# Test files:
+#   tests/planning/test_backends.py   — McpToolBackend, guards, result extraction
+#   tests/planning/test_context.py    — PlanningContext, PlanRegistry round-trips
+#   tests/planning/test_executor.py   — PlanRunner, batching, variables, DAG, re-planning
+#   tests/planning/test_tools.py      — Plan-as-a-Tool definitions, validation, handlers
+```
diff --git a/examples/README.md b/examples/README.md
index 193f5dda..e7526a6a 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -12,6 +12,7 @@ examples/
   commands/              Command system and slash commands
   servers/               Server management and custom providers
   apps/                  MCP Apps (SEP-1865) interactive UI demos
+  planning/              Plan execution, parallel batches, guards
   safety/                Context safety mechanisms (Tier 1)
   sample_tools/          Reusable tool classes for demos
 ```
@@ -123,6 +124,61 @@ Demonstrates:
 4. Host page serving a sandboxed iframe with WebSocket communication
 5. Full `ui/initialize` handshake and tool call round-trip
 
+## Planning (Tier 6)
+
+Execution plans — reproducible, inspectable, parallelizable tool call graphs.
+
+### Self-Contained Demos (no API key needed)
+
+```bash
+# Plan basics: create, inspect, save, load, delete, DAG visualization
+uv run python examples/planning/plan_basics_demo.py
+
+# Plan execution: dry-run, live execution, variable resolution, checkpoints, failure handling
+uv run python examples/planning/plan_execution_demo.py
+
+# Parallel execution: topological batching, concurrent steps, timing evidence
+uv run python examples/planning/plan_parallel_demo.py
+
+# Guard integration: budget limits, per-tool caps, result recording, error handling
+uv run python examples/planning/plan_guard_demo.py
+```
+
+### LLM-Integrated Demos (requires OPENAI_API_KEY)
+
+```bash
+# Full pipeline: LLM generates plan from natural language → validate → visualize → execute
+uv run python examples/planning/plan_llm_demo.py
+
+# Use a different model
+uv run python examples/planning/plan_llm_demo.py --model gpt-4o
+
+# Custom task description
+uv run python examples/planning/plan_llm_demo.py --prompt "fetch weather for 3 cities and compare"
+
+# Plan-as-a-Tool (Tier 6.8): The LLM decides WHEN to plan — uses plan_create_and_execute
+# for complex multi-step tasks, calls tools directly for simple ones
+uv run python examples/planning/plan_as_tool_demo.py
+
+# Custom task
+uv run python examples/planning/plan_as_tool_demo.py --prompt "read the config and run tests"
+```
+
+Demonstrates:
+1. PlanningContext initialization and PlanRegistry round-trips (save/load/delete)
+2. DAG visualization with status indicators and parallel markers
+3. Dry-run mode (trace without executing)
+4. Parallel batch execution (independent steps run concurrently)
+5. Variable resolution (`${var}`, `${var.field}`, template strings)
+6. Execution checkpointing and resume support
+7. Step failure handling with checkpoint persistence
+8. Guard integration (pre-execution blocking, post-execution recording)
+9. MCP content block extraction
+10. Fan-out, diamond, and wide pipeline DAG patterns with timing evidence
+11. LLM plan generation with PlanAgent (auto-retry on validation failure)
+12. End-to-end pipeline: natural language → structured plan → parallel execution
+13. Model-driven planning: LLM autonomously invokes plan_create_and_execute when tasks need multi-step coordination
+
 ## Safety
 
 ### Tier 1: Context Safety
diff --git a/examples/planning/plan_as_tool_demo.py b/examples/planning/plan_as_tool_demo.py
new file mode 100644
index 00000000..2a910858
--- /dev/null
+++ b/examples/planning/plan_as_tool_demo.py
@@ -0,0 +1,440 @@
+#!/usr/bin/env python
+"""
+Plan-as-a-Tool demo — the LLM autonomously creates and executes plans.
+
+Demonstrates Tier 6.8: Model-Driven Planning. Instead of the user
+explicitly requesting a plan, the model itself decides when a task
+requires multi-step coordination and calls plan_create_and_execute.
+
+Flow:
+1. User asks a complex question that needs multiple tool calls
+2. LLM sees plan_create_and_execute alongside regular tools
+3. LLM decides to create a plan and invokes the tool
+4. planning/tools.py generates the plan via PlanAgent, then executes it
+5. Results are returned to the LLM, which summarizes them for the user
+
+Requires OPENAI_API_KEY (uses gpt-4o-mini by default).
+
+Usage:
+    uv run python examples/planning/plan_as_tool_demo.py
+    uv run python examples/planning/plan_as_tool_demo.py --model gpt-4o
+    uv run python examples/planning/plan_as_tool_demo.py --prompt "your task here"
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import os
+import sys
+import tempfile
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+# Load .env if available
+try:
+    from dotenv import load_dotenv
+
+    load_dotenv()
+except ImportError:
+    pass
+
+# Add src to path for local development
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
+
+from chuk_llm.llm.client import get_client
+from mcp_cli.planning.context import PlanningContext
+from mcp_cli.planning.tools import get_plan_tools_as_dicts, handle_plan_tool
+
+
+# ── Mock ToolManager ────────────────────────────────────────────────────────
+
+
+@dataclass
+class FakeToolCallResult:
+    tool_name: str
+    success: bool = True
+    result: Any = None
+    error: str | None = None
+
+
+class MockToolManager:
+    """ToolManager with realistic tools for demonstration.
+
+    The LLM uses these tools inside its generated plans.
+    Plan tools (plan_create_and_execute, etc.) are added separately
+    and intercepted before they reach this manager.
+    """
+
+    TOOL_CATALOG = {
+        "read_file": {
+            "description": "Read a file and return its contents",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {"type": "string", "description": "File path to read"},
+                },
+                "required": ["path"],
+            },
+        },
+        "write_file": {
+            "description": "Write content to a file",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {"type": "string", "description": "File path to write to"},
+                    "content": {"type": "string", "description": "Content to write"},
+                },
+                "required": ["path", "content"],
+            },
+        },
+        "search_code": {
+            "description": "Search codebase for a pattern and return matching files/lines",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Search pattern or regex",
+                    },
+                },
+                "required": ["query"],
+            },
+        },
+        "run_tests": {
+            "description": "Run test suite for a given path and return pass/fail results",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Test path (directory or file)",
+                    },
+                },
+                "required": ["path"],
+            },
+        },
+        "fetch_url": {
+            "description": "Fetch data from a URL and return the response body",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "url": {"type": "string", "description": "URL to fetch"},
+                },
+                "required": ["url"],
+            },
+        },
+    }
+
+    MOCK_RESULTS = {
+        "read_file": "def handle_auth(request):\n    token = request.headers.get('Authorization')\n    if not token:\n        raise AuthError('Missing token')\n    return verify_jwt(token)",
+        "write_file": "Written 28 lines to file successfully",
+        "search_code": "Found 6 matches:\n  src/auth.py:12 - from auth.handler import handle_auth\n  src/routes.py:3 - from auth.handler import handle_auth\n  src/middleware.py:8 - import auth.handler\n  tests/test_auth.py:5 - from auth.handler import handle_auth\n  tests/test_routes.py:11 - from auth.handler import handle_auth\n  utils/decorators.py:3 - from auth.handler import verify_jwt",
+        "run_tests": "8 passed, 0 failed, 2 skipped in 1.34s",
+        "fetch_url": '{"users": [{"id": 1, "name": "Alice", "role": "admin"}, {"id": 2, "name": "Bob", "role": "user"}], "total": 2}',
+    }
+
+    def __init__(self, *, delay: float = 0.05):
+        self._delay = delay
+        self.call_log: list[tuple[str, dict]] = []
+
+    @dataclass
+    class ToolInfo:
+        name: str
+
+    def get_all_tools(self):
+        return [self.ToolInfo(name=n) for n in self.TOOL_CATALOG]
+
+    async def get_adapted_tools_for_llm(
+        self, provider: str = "openai"
+    ) -> tuple[list[dict[str, Any]], dict]:
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": name,
+                    "description": info["description"],
+                    "parameters": info["parameters"],
+                },
+            }
+            for name, info in self.TOOL_CATALOG.items()
+        ]
+        return tools, {}
+
+    async def execute_tool(self, tool_name, arguments, namespace=None, timeout=None):
+        await asyncio.sleep(self._delay)
+        self.call_log.append((tool_name, arguments))
+        result = self.MOCK_RESULTS.get(tool_name, f"Result from {tool_name}")
+        return FakeToolCallResult(tool_name=tool_name, result=result)
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+
+def section(title: str) -> None:
+    print(f"\n{'=' * 60}")
+    print(f"  {title}")
+    print(f"{'=' * 60}\n")
+
+
+def build_conversation_tools(tm: MockToolManager) -> list[dict[str, Any]]:
+    """Build the full tool list the LLM sees: regular tools + plan tools."""
+    regular_tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": name,
+                "description": info["description"],
+                "parameters": info["parameters"],
+            },
+        }
+        for name, info in tm.TOOL_CATALOG.items()
+    ]
+
+    plan_tools = get_plan_tools_as_dicts()
+
+    return regular_tools + plan_tools
+
+
+# ── Demo ─────────────────────────────────────────────────────────────────────
+
+
+async def run_conversation(
+    model: str,
+    user_message: str,
+    tm: MockToolManager,
+    plans_dir: Path,
+) -> None:
+    """Run a single conversation turn where the LLM may use plan tools."""
+    print(f'  User: "{user_message}"')
+    print()
+
+    # Build the full tool list
+    all_tools = build_conversation_tools(tm)
+
+    print(f"  Tools available to LLM: {len(all_tools)}")
+    for t in all_tools:
+        name = t["function"]["name"]
+        desc = t["function"]["description"][:50]
+        print(f"    - {name}: {desc}...")
+    print()
+
+    # Create LLM client
+    client = get_client(provider="openai", model=model)
+
+    system_prompt = (
+        "You are a helpful assistant with access to tools. "
+        "When a task requires multiple coordinated steps (e.g., read a file, "
+        "then search for usages, then run tests), use plan_create_and_execute "
+        "to handle it efficiently. For simple single-tool tasks, call the tool "
+        "directly."
+    )
+
+    messages: list[dict[str, Any]] = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_message},
+    ]
+
+    # ── Turn 1: LLM decides what to do ──
+    print("  [1] Sending to LLM...")
+    completion = await client.create_completion(
+        messages=messages,
+        tools=all_tools,
+        tool_choice="auto",
+    )
+
+    assistant_content = completion.get("content", "")
+    tool_calls = completion.get("tool_calls", [])
+
+    if not tool_calls:
+        print(f"  LLM responded directly: {assistant_content}")
+        return
+
+    # Add assistant message with tool calls
+    assistant_msg: dict[str, Any] = {
+        "role": "assistant",
+        "content": assistant_content or "",
+    }
+    assistant_msg["tool_calls"] = [
+        {
+            "id": tc.get("id", f"call_{i}"),
+            "type": "function",
+            "function": {
+                "name": tc.get("function", {}).get("name", tc.get("name", "")),
+                "arguments": tc.get("function", {}).get(
+                    "arguments", json.dumps(tc.get("arguments", {}))
+                ),
+            },
+        }
+        for i, tc in enumerate(tool_calls)
+    ]
+    messages.append(assistant_msg)
+
+    print(f"  LLM chose {len(tool_calls)} tool call(s):")
+
+    # ── Execute tool calls ──
+    for tc in tool_calls:
+        func = tc.get("function", {})
+        tool_name = func.get("name", tc.get("name", ""))
+        raw_args = func.get("arguments", tc.get("arguments", "{}"))
+        call_id = tc.get("id", "call_0")
+
+        if isinstance(raw_args, str):
+            try:
+                arguments = json.loads(raw_args)
+            except json.JSONDecodeError:
+                arguments = {}
+        else:
+            arguments = raw_args
+
+        print(f"\n    Tool: {tool_name}")
+        print(f"    Args: {json.dumps(arguments, default=str)[:80]}")
+
+        # Check if this is a plan tool (intercepted)
+        from mcp_cli.planning.tools import _PLAN_TOOL_NAMES
+
+        if tool_name in _PLAN_TOOL_NAMES:
+            print("    -> Intercepted as plan tool!")
+            print()
+
+            # Create PlanningContext
+            ctx = PlanningContext(tm, plans_dir=plans_dir)
+
+            # Execute the plan tool
+            print("  [2] Executing plan tool...")
+            result_json = await handle_plan_tool(
+                tool_name, arguments, ctx, model_manager=None
+            )
+
+            result_data = json.loads(result_json)
+
+            if result_data.get("success"):
+                print(f"  Plan: {result_data.get('title', 'Untitled')}")
+                print(
+                    f"  Steps completed: {result_data.get('steps_completed', 0)}/{result_data.get('steps_total', 0)}"
+                )
+                print(f"  Duration: {result_data.get('duration', 0)}s")
+
+                if result_data.get("steps"):
+                    print("\n  Step results:")
+                    for step in result_data["steps"]:
+                        status = "OK" if step.get("success") else "FAIL"
+                        print(
+                            f"    [{step.get('index', '?')}] {step.get('title', '')} [{step.get('tool', '')}] -> {status}"
+                        )
+
+                if result_data.get("results"):
+                    print("\n  Variable results:")
+                    for key, value in result_data["results"].items():
+                        preview = str(value)[:60]
+                        print(f"    ${key} = {preview}")
+            else:
+                print(f"  ERROR: {result_data.get('error', 'Unknown error')}")
+
+            # Add tool result to messages
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call_id,
+                    "content": result_json,
+                }
+            )
+
+        else:
+            # Regular tool — execute directly via mock
+            result = await tm.execute_tool(tool_name, arguments)
+            result_text = (
+                str(result.result) if result.success else f"Error: {result.error}"
+            )
+            print(f"    -> {result_text[:60]}")
+
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call_id,
+                    "content": result_text,
+                }
+            )
+
+    # ── Turn 2: LLM summarizes results ──
+    print("\n  [3] LLM summarizing results...")
+    final = await client.create_completion(messages=messages)
+
+    # Handle different response shapes from chuk_llm
+    final_content = final.get("content", "") or ""
+    if not final_content and "choices" in final:
+        choices = final["choices"]
+        if choices:
+            final_content = choices[0].get("message", {}).get("content", "")
+
+    if final_content:
+        print(f"\n  Assistant: {final_content}")
+    else:
+        print(
+            "\n  (LLM returned empty summary — tool results above speak for themselves)"
+        )
+
+
+async def main() -> None:
+    parser = argparse.ArgumentParser(description="Plan-as-a-Tool demo (Tier 6.8)")
+    parser.add_argument(
+        "--model",
+        default="gpt-4o-mini",
+        help="OpenAI model (default: gpt-4o-mini)",
+    )
+    parser.add_argument(
+        "--prompt",
+        default=None,
+        help="Custom user message (overrides built-in demos)",
+    )
+    args = parser.parse_args()
+
+    # Check for API key
+    if not os.getenv("OPENAI_API_KEY"):
+        print("ERROR: OPENAI_API_KEY environment variable is not set.")
+        print()
+        print("Set it in your shell:")
+        print("  export OPENAI_API_KEY=sk-...")
+        print()
+        print("Or create a .env file in the project root:")
+        print("  OPENAI_API_KEY=sk-...")
+        sys.exit(1)
+
+    print()
+    print("=" * 60)
+    print("  Plan-as-a-Tool Demo (Tier 6.8)")
+    print(f"  Model: {args.model}")
+    print("  The LLM decides WHEN to plan — not the user")
+    print("=" * 60)
+
+    tm = MockToolManager()
+
+    if args.prompt:
+        prompts = [args.prompt]
+    else:
+        prompts = [
+            # Multi-step task — should trigger plan_create_and_execute
+            "Read the auth module at src/auth/handler.py, find all files that import it, and then run the tests to make sure everything passes.",
+            # Single-step task — should use the tool directly
+            "What's in the file src/auth/handler.py?",
+        ]
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        for i, prompt in enumerate(prompts, 1):
+            section(f"Demo {i}")
+            await run_conversation(
+                model=args.model,
+                user_message=prompt,
+                tm=tm,
+                plans_dir=Path(tmpdir) / f"plans_{i}",
+            )
+
+    print(f"\n{'=' * 60}")
+    print("  Demo complete!")
+    print(f"{'=' * 60}\n")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/planning/plan_basics_demo.py b/examples/planning/plan_basics_demo.py
new file mode 100644
index 00000000..831387a4
--- /dev/null
+++ b/examples/planning/plan_basics_demo.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python
+"""
+Plan basics demo — create, inspect, save, load, and delete plans.
+
+Demonstrates:
+1. PlanningContext initialization and plan persistence
+2. Building a plan from a dict (both 'tool' and 'tool_calls' formats)
+3. PlanRegistry round-trip: save to disk, load from disk, fresh context reload
+4. DAG visualization with render_plan_dag()
+5. Plan CRUD: list, get, delete
+
+No API key or MCP server needed — runs entirely with mocks.
+
+Usage:
+    uv run python examples/planning/plan_basics_demo.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import sys
+import tempfile
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+# Add src to path for local development
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
+
+from mcp_cli.planning.context import PlanningContext
+from mcp_cli.planning.executor import render_plan_dag
+
+
+# ── Mock ToolManager ────────────────────────────────────────────────────────
+
+
+@dataclass
+class FakeToolInfo:
+    name: str
+
+
+class MockToolManager:
+    """Minimal ToolManager stub for demos."""
+
+    TOOLS = ["read_file", "write_file", "search_code", "list_files", "run_tests"]
+
+    async def get_all_tools(self):
+        return [FakeToolInfo(name=n) for n in self.TOOLS]
+
+    async def get_adapted_tools_for_llm(self, provider: str) -> list[dict[str, Any]]:
+        return [
+            {"type": "function", "function": {"name": n, "description": f"Tool: {n}"}}
+            for n in self.TOOLS
+        ]
+
+
+# ── Demo Plans ──────────────────────────────────────────────────────────────
+
+REFACTOR_PLAN = {
+    "title": "Refactor Auth Module",
+    "description": "Read the auth module, find all usages, then refactor",
+    "tags": ["refactor", "auth"],
+    "variables": {"module_path": "src/auth/handler.py"},
+    "steps": [
+        {
+            "index": "1",
+            "title": "Read auth module",
+            "tool": "read_file",
+            "args": {"path": "${module_path}"},
+            "depends_on": [],
+            "result_variable": "auth_code",
+        },
+        {
+            "index": "2",
+            "title": "Find all auth usages",
+            "tool": "search_code",
+            "args": {"query": "from auth.handler import"},
+            "depends_on": [],
+            "result_variable": "usages",
+        },
+        {
+            "index": "3",
+            "title": "List test files",
+            "tool": "list_files",
+            "args": {"pattern": "tests/auth/*.py"},
+            "depends_on": [],
+            "result_variable": "test_files",
+        },
+        {
+            "index": "4",
+            "title": "Write refactored module",
+            "tool": "write_file",
+            "args": {"path": "${module_path}", "content": "refactored code"},
+            "depends_on": ["1", "2"],
+            "result_variable": "write_result",
+        },
+        {
+            "index": "5",
+            "title": "Run auth tests",
+            "tool": "run_tests",
+            "args": {"path": "tests/auth/"},
+            "depends_on": ["3", "4"],
+            "result_variable": "test_results",
+        },
+    ],
+}
+
+API_PLAN = {
+    "title": "Deploy API Endpoint",
+    "steps": [
+        {
+            "title": "Read API spec",
+            "tool_calls": [
+                {"id": "tc-1", "name": "read_file", "args": {"path": "api/spec.yaml"}}
+            ],
+            "depends_on": [],
+            "result_variable": "spec",
+        },
+        {
+            "title": "Generate handler code",
+            "tool_calls": [
+                {
+                    "id": "tc-2",
+                    "name": "write_file",
+                    "args": {"path": "api/handler.py", "content": "..."},
+                }
+            ],
+            "depends_on": ["1"],
+            "result_variable": "handler",
+        },
+        {
+            "title": "Run integration tests",
+            "tool_calls": [
+                {"id": "tc-3", "name": "run_tests", "args": {"path": "tests/api/"}}
+            ],
+            "depends_on": ["2"],
+            "result_variable": "test_result",
+        },
+    ],
+}
+
+
+# ── Demo Runner ─────────────────────────────────────────────────────────────
+
+
+def section(title: str) -> None:
+    print(f"\n{'=' * 60}")
+    print(f"  {title}")
+    print(f"{'=' * 60}\n")
+
+
+async def main() -> None:
+    print()
+    print("=" * 60)
+    print("  Plan Basics Demo")
+    print("  Create, inspect, save, load, and delete plans")
+    print("=" * 60)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        plans_dir = Path(tmpdir) / "plans"
+        tm = MockToolManager()
+        ctx = PlanningContext(tm, plans_dir=plans_dir)
+
+        # ── 1. Tool Catalog ──
+        section("1. Tool Catalog")
+        names = await ctx.get_tool_names()
+        print(f"  Available tools ({len(names)}):")
+        for name in names:
+            print(f"    - {name}")
+
+        # ── 2. DAG Visualization ──
+        section("2. DAG Visualization — Refactor Plan")
+        print(f"  Plan: {REFACTOR_PLAN['title']}")
+        print(f"  Tags: {REFACTOR_PLAN['tags']}")
+        print(f"  Variables: {REFACTOR_PLAN['variables']}")
+        print()
+        dag = render_plan_dag(REFACTOR_PLAN)
+        print(dag)
+        print()
+        print("  Note: Steps 1-3 have no dependencies on each other")
+        print("  and will execute in parallel (same batch).")
+
+        # ── 3. Save Plans ──
+        section("3. Save Plans to Registry")
+        plan_id_1 = await ctx.save_plan_from_dict(REFACTOR_PLAN)
+        print(f"  Saved refactor plan: {plan_id_1[:12]}...")
+
+        plan_id_2 = await ctx.save_plan_from_dict(API_PLAN)
+        print(f"  Saved API plan:      {plan_id_2[:12]}...")
+
+        # ── 4. List Plans ──
+        section("4. List All Plans")
+        plans = await ctx.list_plans()
+        for p in plans:
+            step_count = len(p.get("steps", []))
+            print(f"  [{p['id'][:12]}...]  {p['title']:<30}  ({step_count} steps)")
+
+        # ── 5. Load Plan ──
+        section("5. Load Plan by ID")
+        loaded = await ctx.get_plan(plan_id_1)
+        print(f"  Title:       {loaded['title']}")
+        print(f"  Description: {loaded.get('description', 'N/A')}")
+        print(f"  Steps:       {len(loaded['steps'])}")
+        print(f"  Variables:   {loaded.get('variables', {})}")
+
+        # ── 6. Persistence — Fresh Context ──
+        section("6. Persistence — Fresh Context Loads from Disk")
+        ctx2 = PlanningContext(tm, plans_dir=plans_dir)
+        reloaded = await ctx2.get_plan(plan_id_1)
+        print(f"  Fresh context found plan: {reloaded is not None}")
+        print(
+            f"  Title matches:            {reloaded['title'] == REFACTOR_PLAN['title']}"
+        )
+
+        # ── 7. DAG with Status Indicators ──
+        section("7. DAG with Status Indicators")
+        status_plan = {
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Read source",
+                    "tool": "read_file",
+                    "_status": "completed",
+                },
+                {
+                    "index": "2",
+                    "title": "Search usages",
+                    "tool": "search_code",
+                    "_status": "completed",
+                },
+                {
+                    "index": "3",
+                    "title": "Refactor code",
+                    "tool": "write_file",
+                    "_status": "running",
+                    "depends_on": ["1", "2"],
+                },
+                {
+                    "index": "4",
+                    "title": "Run tests",
+                    "tool": "run_tests",
+                    "_status": "pending",
+                    "depends_on": ["3"],
+                },
+            ]
+        }
+        dag = render_plan_dag(status_plan)
+        print(dag)
+        print()
+        print(
+            "  Legend: \u25cf completed  \u25c9 running  \u25cb pending  \u2717 failed"
+        )
+
+        # ── 8. Delete Plan ──
+        section("8. Delete Plan")
+        deleted = await ctx.delete_plan(plan_id_2)
+        print(f"  Deleted API plan: {deleted}")
+        remaining = await ctx.list_plans()
+        print(f"  Plans remaining:  {len(remaining)}")
+
+        # ── 9. tool_calls Format ──
+        section("9. tool_calls Format (from PlanRegistry)")
+        dag = render_plan_dag(API_PLAN)
+        print(f"  Plan: {API_PLAN['title']}")
+        print()
+        print(dag)
+
+    print(f"\n{'=' * 60}")
+    print("  Demo complete!")
+    print(f"{'=' * 60}\n")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/planning/plan_execution_demo.py b/examples/planning/plan_execution_demo.py
new file mode 100644
index 00000000..0f86b115
--- /dev/null
+++ b/examples/planning/plan_execution_demo.py
@@ -0,0 +1,330 @@
+#!/usr/bin/env python
+"""
+Plan execution demo — run plans with parallel batches, checkpoints, and dry-run.
+
+Demonstrates:
+1. Dry-run mode: trace execution without side effects
+2. Live execution with mock tools and progress callbacks
+3. Parallel batch execution: independent steps run concurrently
+4. Variable resolution: ${var}, ${var.field}, template strings
+5. Execution checkpointing and resume
+6. Step failure handling
+
+No API key or MCP server needed — runs entirely with mocks.
+
+Usage:
+    uv run python examples/planning/plan_execution_demo.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import sys
+import tempfile
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
+
+from mcp_cli.planning.context import PlanningContext
+from mcp_cli.planning.executor import PlanRunner, render_plan_dag
+
+
+# ── Mock ToolManager ────────────────────────────────────────────────────────
+
+
+@dataclass
+class FakeToolCallResult:
+    tool_name: str
+    success: bool = True
+    result: Any = None
+    error: str | None = None
+
+
+class MockToolManager:
+    """ToolManager stub that simulates tool execution with realistic results."""
+
+    TOOLS = {
+        "read_file": "def handle_auth(request):\n    token = request.headers.get('Authorization')\n    return verify(token)",
+        "search_code": "Found 12 usages across 5 files:\n  - api/routes.py:14\n  - api/middleware.py:8\n  - tests/test_auth.py:23\n  - tests/test_routes.py:41\n  - utils/decorators.py:7",
+        "list_files": "tests/auth/test_handler.py\ntests/auth/test_middleware.py\ntests/auth/conftest.py",
+        "write_file": "Written 42 lines to file",
+        "run_tests": "5 tests passed, 0 failed",
+        "fetch_url": '{"users": [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]}',
+        "process_data": "Processed 2 records",
+    }
+
+    def __init__(self, *, fail_tools: set[str] | None = None, delay: float = 0.05):
+        self._fail_tools = fail_tools or set()
+        self._delay = delay
+        self.call_log: list[tuple[str, dict]] = []
+
+    @dataclass
+    class ToolInfo:
+        name: str
+
+    def get_all_tools(self):
+        return [self.ToolInfo(name=n) for n in self.TOOLS]
+
+    async def execute_tool(self, tool_name, arguments, namespace=None, timeout=None):
+        await asyncio.sleep(self._delay)  # Simulate network latency
+        self.call_log.append((tool_name, arguments))
+
+        if tool_name in self._fail_tools:
+            return FakeToolCallResult(
+                tool_name=tool_name,
+                success=False,
+                error=f"Connection refused: {tool_name} server is down",
+            )
+
+        result = self.TOOLS.get(tool_name, f"Result from {tool_name}")
+        return FakeToolCallResult(tool_name=tool_name, result=result)
+
+
+# ── Demo Plans ──────────────────────────────────────────────────────────────
+
+REFACTOR_PLAN = {
+    "id": "refactor-auth-001",
+    "title": "Refactor Auth Module",
+    "variables": {"module_path": "src/auth/handler.py"},
+    "steps": [
+        {
+            "index": "1",
+            "title": "Read auth module",
+            "tool": "read_file",
+            "args": {"path": "${module_path}"},
+            "depends_on": [],
+            "result_variable": "auth_code",
+        },
+        {
+            "index": "2",
+            "title": "Find all auth usages",
+            "tool": "search_code",
+            "args": {"query": "from auth.handler import"},
+            "depends_on": [],
+            "result_variable": "usages",
+        },
+        {
+            "index": "3",
+            "title": "List test files",
+            "tool": "list_files",
+            "args": {"pattern": "tests/auth/*.py"},
+            "depends_on": [],
+            "result_variable": "test_files",
+        },
+        {
+            "index": "4",
+            "title": "Write refactored module",
+            "tool": "write_file",
+            "args": {"path": "${module_path}", "content": "refactored code"},
+            "depends_on": ["1", "2"],
+            "result_variable": "write_result",
+        },
+        {
+            "index": "5",
+            "title": "Run auth tests",
+            "tool": "run_tests",
+            "args": {"path": "tests/auth/"},
+            "depends_on": ["3", "4"],
+            "result_variable": "test_results",
+        },
+    ],
+}
+
+API_PIPELINE = {
+    "id": "api-pipeline-001",
+    "title": "API Data Pipeline",
+    "variables": {
+        "api": {"host": "api.example.com", "version": "v2"},
+    },
+    "steps": [
+        {
+            "index": "1",
+            "title": "Fetch users",
+            "tool": "fetch_url",
+            "args": {"url": "https://${api.host}/${api.version}/users"},
+            "depends_on": [],
+            "result_variable": "users",
+        },
+        {
+            "index": "2",
+            "title": "Process user data",
+            "tool": "process_data",
+            "args": {"data": "${users}"},
+            "depends_on": ["1"],
+            "result_variable": "processed",
+        },
+    ],
+}
+
+
+# ── Demo Runner ─────────────────────────────────────────────────────────────
+
+
+def section(title: str) -> None:
+    print(f"\n{'=' * 60}")
+    print(f"  {title}")
+    print(f"{'=' * 60}\n")
+
+
+def make_callbacks():
+    """Create progress callbacks that print step progress."""
+
+    def on_start(index, title, tool_name):
+        print(f"    [{index}] {title} [{tool_name}]...")
+
+    def on_complete(step_result):
+        if step_result.success:
+            result_preview = str(step_result.result)[:60]
+            print(f"         -> OK ({step_result.duration:.2f}s): {result_preview}")
+        else:
+            print(
+                f"         -> FAIL ({step_result.duration:.2f}s): {step_result.error}"
+            )
+
+    return on_start, on_complete
+
+
+async def main() -> None:
+    print()
+    print("=" * 60)
+    print("  Plan Execution Demo")
+    print("  Parallel batches, checkpoints, dry-run, variable resolution")
+    print("=" * 60)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        plans_dir = Path(tmpdir) / "plans"
+
+        # ── 1. Dry Run ──
+        section("1. Dry-Run Mode")
+        print("  Trace plan execution without running any tools:\n")
+
+        tm = MockToolManager()
+        ctx = PlanningContext(tm, plans_dir=plans_dir)
+        on_start, on_complete = make_callbacks()
+
+        runner = PlanRunner(
+            ctx,
+            on_step_start=on_start,
+            on_step_complete=on_complete,
+            enable_guards=False,
+        )
+
+        result = await runner.execute_plan(
+            REFACTOR_PLAN, dry_run=True, checkpoint=False
+        )
+
+        print(f"\n  Result: {'SUCCESS' if result.success else 'FAILED'}")
+        print(f"  Simulated variables: {list(result.variables.keys())}")
+        print(f"  Tools called: {len(tm.call_log)} (should be 0 in dry-run)")
+
+        # ── 2. DAG View ──
+        section("2. Execution DAG")
+        print(f"  Plan: {REFACTOR_PLAN['title']}")
+        print("  Batch structure:\n")
+        dag = render_plan_dag(REFACTOR_PLAN)
+        print(dag)
+        print()
+        print("  Batch 1: Steps 1, 2, 3 (parallel - no deps)")
+        print("  Batch 2: Step 4 (depends on 1, 2)")
+        print("  Batch 3: Step 5 (depends on 3, 4)")
+
+        # ── 3. Live Execution ──
+        section("3. Live Execution with Parallel Batches")
+        print("  Executing refactor plan with mock tools:\n")
+
+        tm2 = MockToolManager()
+        ctx2 = PlanningContext(tm2, plans_dir=plans_dir)
+        on_start2, on_complete2 = make_callbacks()
+
+        runner2 = PlanRunner(
+            ctx2,
+            on_step_start=on_start2,
+            on_step_complete=on_complete2,
+            enable_guards=False,
+        )
+
+        result = await runner2.execute_plan(REFACTOR_PLAN, checkpoint=True)
+
+        print(f"\n  Result:   {'SUCCESS' if result.success else 'FAILED'}")
+        print(f"  Steps:    {len(result.steps)}")
+        print(f"  Duration: {result.total_duration:.2f}s")
+        print(f"  Tools called: {len(tm2.call_log)}")
+        print()
+        print("  Variable bindings:")
+        for key, value in result.variables.items():
+            preview = str(value)[:50]
+            print(f"    ${key} = {preview}")
+
+        # ── 4. Variable Resolution ──
+        section("4. Variable Resolution (Nested + Templates)")
+        print(f"  Plan: {API_PIPELINE['title']}")
+        print(f"  Variables: {json.dumps(API_PIPELINE['variables'], indent=4)}")
+        print()
+        print('  Step 1 args: url = "https://${{api.host}}/${{api.version}}/users"')
+        print('  Step 2 args: data = "${{users}}"')
+        print()
+
+        tm3 = MockToolManager()
+        ctx3 = PlanningContext(tm3, plans_dir=plans_dir)
+        runner3 = PlanRunner(ctx3, enable_guards=False)
+
+        result = await runner3.execute_plan(API_PIPELINE, checkpoint=False)
+
+        print("  Executed. Tool calls:")
+        for tool_name, args in tm3.call_log:
+            print(f"    {tool_name}({json.dumps(args, default=str)[:60]})")
+
+        # ── 5. Checkpoint & Resume ──
+        section("5. Execution Checkpointing")
+
+        checkpoint_path = plans_dir / "refactor-auth-001_state.json"
+        if checkpoint_path.exists():
+            data = json.loads(checkpoint_path.read_text())
+            print(f"  Checkpoint file: {checkpoint_path.name}")
+            print(f"  Status:          {data['status']}")
+            print(f"  Completed steps: {data['completed_steps']}")
+            print(f"  Variables saved:  {list(data['variables'].keys())}")
+        else:
+            print("  No checkpoint found (checkpoint=False was used)")
+
+        # ── 6. Step Failure ──
+        section("6. Step Failure Handling")
+        print("  Executing with 'write_file' tool failing:\n")
+
+        tm4 = MockToolManager(fail_tools={"write_file"})
+        ctx4 = PlanningContext(tm4, plans_dir=Path(tmpdir) / "plans2")
+        on_start4, on_complete4 = make_callbacks()
+
+        runner4 = PlanRunner(
+            ctx4,
+            on_step_start=on_start4,
+            on_step_complete=on_complete4,
+            enable_guards=False,
+        )
+
+        result = await runner4.execute_plan(REFACTOR_PLAN, checkpoint=True)
+
+        print(f"\n  Result: {'SUCCESS' if result.success else 'FAILED'}")
+        print(f"  Error:  {result.error}")
+        print(f"  Steps completed: {sum(1 for s in result.steps if s.success)}")
+        print(f"  Steps failed:    {sum(1 for s in result.steps if not s.success)}")
+
+        # Check failure checkpoint
+        fail_ckpt = Path(tmpdir) / "plans2" / "refactor-auth-001_state.json"
+        if fail_ckpt.exists():
+            data = json.loads(fail_ckpt.read_text())
+            print("\n  Failure checkpoint saved:")
+            print(f"    Status:          {data['status']}")
+            print(f"    Completed steps: {data['completed_steps']}")
+            print("    (Resume with: /plan resume refactor-auth-001)")
+
+    print(f"\n{'=' * 60}")
+    print("  Demo complete!")
+    print(f"{'=' * 60}\n")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/planning/plan_guard_demo.py b/examples/planning/plan_guard_demo.py
new file mode 100644
index 00000000..e3ee7996
--- /dev/null
+++ b/examples/planning/plan_guard_demo.py
@@ -0,0 +1,231 @@
+#!/usr/bin/env python
+"""
+Guard integration demo — shows how plans respect budget and per-tool limits.
+
+Demonstrates:
+1. McpToolBackend with guard checks enabled
+2. Pre-execution guard blocking (budget exhausted, per-tool cap)
+3. Post-execution result recording (value binding, tool count tracking)
+4. Guards disabled mode (bypass all checks)
+5. Result extraction from MCP content blocks
+
+No API key, MCP server, or chuk_ai_session_manager needed — uses mocks.
+
+Usage:
+    uv run python examples/planning/plan_guard_demo.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+from unittest.mock import patch
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
+
+from chuk_ai_planner.execution.models import ToolExecutionRequest
+from mcp_cli.planning.backends import McpToolBackend, _extract_result
+
+
+# ── Mock ToolManager ────────────────────────────────────────────────────────
+
+
+@dataclass
+class FakeToolCallResult:
+    tool_name: str
+    success: bool = True
+    result: Any = None
+    error: str | None = None
+
+
+class MockToolManager:
+    """Minimal ToolManager for demos."""
+
+    def __init__(self, results: dict[str, Any] | None = None):
+        self._results = results or {}
+        self.calls: list[tuple[str, dict]] = []
+
+    async def execute_tool(self, tool_name, arguments, namespace=None, timeout=None):
+        self.calls.append((tool_name, arguments))
+        result = self._results.get(tool_name, f"result from {tool_name}")
+        return FakeToolCallResult(tool_name=tool_name, result=result)
+
+
+# ── Demo Runner ─────────────────────────────────────────────────────────────
+
+
+def section(title: str) -> None:
+    print(f"\n{'=' * 60}")
+    print(f"  {title}")
+    print(f"{'=' * 60}\n")
+
+
+async def main() -> None:
+    print()
+    print("=" * 60)
+    print("  Guard Integration Demo")
+    print("  Budget, per-tool limits, and result recording")
+    print("=" * 60)
+
+    # ── 1. Basic Execution (Guards Disabled) ──
+    section("1. Basic Execution (Guards Disabled)")
+
+    tm = MockToolManager({"read_file": "Hello, World!"})
+    backend = McpToolBackend(tm, enable_guards=False)
+
+    request = ToolExecutionRequest(
+        tool_name="read_file",
+        args={"path": "/tmp/test.txt"},
+        step_id="step-1",
+    )
+    result = await backend.execute_tool(request)
+
+    print(f"  Tool:     {result.tool_name}")
+    print(f"  Success:  {result.success}")
+    print(f"  Result:   {result.result}")
+    print(f"  Duration: {result.duration:.4f}s")
+    print(f"  Error:    {result.error}")
+
+    # ── 2. Guard Blocks Execution ──
+    section("2. Guard Blocks Execution")
+    print("  Simulating budget exhausted guard block:\n")
+
+    tm2 = MockToolManager({"write_file": "should not see this"})
+    backend2 = McpToolBackend(tm2, enable_guards=True)
+
+    with patch(
+        "mcp_cli.planning.backends._check_guards",
+        return_value="Budget exhausted: $12.50 of $10.00 limit used",
+    ):
+        request2 = ToolExecutionRequest(
+            tool_name="write_file",
+            args={"path": "/tmp/output.txt", "content": "data"},
+            step_id="step-2",
+        )
+        result2 = await backend2.execute_tool(request2)
+
+    print(f"  Tool:     {result2.tool_name}")
+    print(f"  Success:  {result2.success}")
+    print(f"  Error:    {result2.error}")
+    print(f"  Result:   {result2.result}")
+    print(f"  Tool was called: {len(tm2.calls) > 0}")
+    print("\n  The tool was never executed — guard blocked it pre-flight.")
+
+    # ── 3. Guard Allows + Records ──
+    section("3. Guard Allows + Records Result")
+    print("  Simulating guard allowing execution and recording result:\n")
+
+    tm3 = MockToolManager({"search_code": "Found 5 matches"})
+    backend3 = McpToolBackend(tm3, enable_guards=True)
+
+    record_calls = []
+
+    with (
+        patch(
+            "mcp_cli.planning.backends._check_guards",
+            return_value=None,  # Guard allows
+        ),
+        patch(
+            "mcp_cli.planning.backends._record_result",
+            side_effect=lambda *args: record_calls.append(args),
+        ),
+    ):
+        request3 = ToolExecutionRequest(
+            tool_name="search_code",
+            args={"query": "def main"},
+            step_id="step-3",
+        )
+        result3 = await backend3.execute_tool(request3)
+
+    print(f"  Tool:          {result3.tool_name}")
+    print(f"  Success:       {result3.success}")
+    print(f"  Result:        {result3.result}")
+    print(f"  Record called: {len(record_calls) > 0}")
+    if record_calls:
+        tool, args, res = record_calls[0]
+        print(f"  Recorded:      tool={tool}, args={args}, result={res}")
+
+    # ── 4. Namespace Prefix ──
+    section("4. Namespace Prefix")
+    print("  When a namespace is set, tools are called with a prefix:\n")
+
+    tm4 = MockToolManager({"filesystem__read_file": "file contents"})
+    backend4 = McpToolBackend(tm4, namespace="filesystem", enable_guards=False)
+
+    request4 = ToolExecutionRequest(
+        tool_name="read_file",
+        args={"path": "/tmp/x"},
+        step_id="step-4",
+    )
+    result4 = await backend4.execute_tool(request4)
+
+    print("  Request tool:    read_file")
+    print(f"  Actual call:     {tm4.calls[0][0]}")
+    print(f"  Result tool:     {result4.tool_name}")
+    print(f"  Success:         {result4.success}")
+
+    # ── 5. Error Handling ──
+    section("5. Tool Error Handling")
+    print("  Backend catches exceptions from ToolManager:\n")
+
+    class ExplodingToolManager:
+        async def execute_tool(self, *args, **kwargs):
+            raise ConnectionError("MCP server connection refused")
+
+    backend5 = McpToolBackend(ExplodingToolManager(), enable_guards=False)
+
+    request5 = ToolExecutionRequest(
+        tool_name="ping",
+        args={},
+        step_id="step-5",
+    )
+    result5 = await backend5.execute_tool(request5)
+
+    print(f"  Success:  {result5.success}")
+    print(f"  Error:    {result5.error}")
+    print(f"  Duration: {result5.duration:.4f}s")
+    print("\n  Exception caught and wrapped — no crash.")
+
+    # ── 6. Result Extraction ──
+    section("6. MCP Content Block Extraction")
+    print("  _extract_result normalizes MCP-style content blocks:\n")
+
+    examples = [
+        ("None", None),
+        ("String", "hello world"),
+        ("Dict", {"key": "value"}),
+        ("Single text block", [{"type": "text", "text": "result data"}]),
+        (
+            "Multiple text blocks",
+            [
+                {"type": "text", "text": "line 1"},
+                {"type": "text", "text": "line 2"},
+            ],
+        ),
+        (
+            "Mixed blocks (image + text)",
+            [
+                {"type": "image", "url": "http://example.com/img.png"},
+                {"type": "text", "text": "caption"},
+            ],
+        ),
+        ("List of strings", ["a", "b", "c"]),
+    ]
+
+    for label, raw in examples:
+        extracted = _extract_result(raw)
+        display = repr(extracted)
+        if len(display) > 50:
+            display = display[:50] + "..."
+        print(f"    {label:<30} -> {display}")
+
+    print(f"\n{'=' * 60}")
+    print("  Demo complete!")
+    print(f"{'=' * 60}\n")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/planning/plan_llm_demo.py b/examples/planning/plan_llm_demo.py
new file mode 100644
index 00000000..f1ed43a5
--- /dev/null
+++ b/examples/planning/plan_llm_demo.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python
+"""
+LLM-integrated planning demo — generate plans from natural language.
+
+Demonstrates the full planning pipeline:
+1. PlanAgent generates a structured plan from a natural language description
+2. Plan validation against available tool catalog
+3. DAG visualization of the generated plan
+4. Live execution with mock tools and progress callbacks
+5. Retry loop: if the LLM produces invalid steps, PlanAgent auto-corrects
+
+Requires an OpenAI API key (OPENAI_API_KEY environment variable or .env file).
+Uses gpt-4o-mini by default for fast, cheap plan generation.
+
+Usage:
+    uv run python examples/planning/plan_llm_demo.py
+    uv run python examples/planning/plan_llm_demo.py --model gpt-4o
+    uv run python examples/planning/plan_llm_demo.py --prompt "fetch weather for 3 cities and compare"
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import os
+import sys
+import tempfile
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+# Load .env if available (for OPENAI_API_KEY)
+try:
+    from dotenv import load_dotenv
+
+    load_dotenv()
+except ImportError:
+    pass
+
+# Add src to path for local development
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
+
+from mcp_cli.planning.context import PlanningContext
+from mcp_cli.planning.executor import PlanRunner, render_plan_dag
+
+
+# ── Mock ToolManager ────────────────────────────────────────────────────────
+
+
+@dataclass
+class FakeToolCallResult:
+    tool_name: str
+    success: bool = True
+    result: Any = None
+    error: str | None = None
+
+
+class MockToolManager:
+    """ToolManager with a realistic tool catalog for LLM plan generation.
+
+    The LLM sees the tool names and generates plans using them.
+    Execution returns mock results so no real MCP server is needed.
+    """
+
+    TOOL_CATALOG = {
+        "read_file": "Read a file and return its contents",
+        "write_file": "Write content to a file",
+        "list_files": "List files matching a glob pattern",
+        "search_code": "Search codebase for a pattern",
+        "run_tests": "Run test suite and return results",
+        "fetch_url": "Fetch data from a URL",
+        "execute_query": "Execute a database query",
+        "send_notification": "Send a notification message",
+    }
+
+    MOCK_RESULTS = {
+        "read_file": "def handle_request(req):\n    return process(req.data)",
+        "write_file": "Written 35 lines to file",
+        "list_files": "src/auth.py\nsrc/routes.py\nsrc/middleware.py",
+        "search_code": "Found 8 matches:\n  src/auth.py:12\n  src/routes.py:45\n  tests/test_auth.py:7",
+        "run_tests": "12 passed, 0 failed, 0 skipped",
+        "fetch_url": '{"status": "ok", "data": [{"id": 1, "name": "Alice"}]}',
+        "execute_query": "3 rows returned",
+        "send_notification": "Notification sent successfully",
+    }
+
+    def __init__(self, *, delay: float = 0.05):
+        self._delay = delay
+        self.call_log: list[tuple[str, dict]] = []
+
+    @dataclass
+    class ToolInfo:
+        name: str
+
+    def get_all_tools(self):
+        return [self.ToolInfo(name=n) for n in self.TOOL_CATALOG]
+
+    async def execute_tool(self, tool_name, arguments, namespace=None, timeout=None):
+        await asyncio.sleep(self._delay)
+        self.call_log.append((tool_name, arguments))
+        result = self.MOCK_RESULTS.get(tool_name, f"Result from {tool_name}")
+        return FakeToolCallResult(tool_name=tool_name, result=result)
+
+
+# ── Plan Generation ─────────────────────────────────────────────────────────
+
+
+def build_system_prompt(tool_names: list[str]) -> str:
+    """Build the system prompt that tells the LLM what tools are available."""
+    tools_list = "\n".join(f"  - {name}" for name in tool_names)
+    return f"""You are a planning assistant. Given a task description, create a structured execution plan.
+
+Available tools:
+{tools_list}
+
+Output a JSON object with this exact structure:
+{{
+  "title": "Short plan title",
+  "steps": [
+    {{
+      "title": "What this step does",
+      "tool": "tool_name",
+      "args": {{"arg1": "value1"}},
+      "depends_on": [],
+      "result_variable": "optional_var_name"
+    }}
+  ]
+}}
+
+Rules:
+- Only use tools from the available tools list above
+- depends_on is a list of step indices (1-based) that must complete first
+- result_variable stores the output for use in later steps as ${{var_name}}
+- Keep plans focused — prefer fewer, targeted steps over many small ones
+- Each step should have exactly one tool call
+- Steps with no dependencies can run in parallel"""
+
+
+def validate_step(step: dict, tool_names: list[str]) -> tuple[bool, str]:
+    """Validate a single plan step against the tool catalog."""
+    tool = step.get("tool", "")
+    if tool not in tool_names:
+        return False, f"Unknown tool: {tool}. Available: {', '.join(tool_names)}"
+    if not step.get("title"):
+        return False, "Step must have a title"
+    return True, ""
+
+
+# ── Demo Runner ─────────────────────────────────────────────────────────────
+
+
+def section(title: str) -> None:
+    print(f"\n{'=' * 60}")
+    print(f"  {title}")
+    print(f"{'=' * 60}\n")
+
+
+async def main() -> None:
+    parser = argparse.ArgumentParser(description="LLM plan generation demo")
+    parser.add_argument(
+        "--model",
+        default="gpt-4o-mini",
+        help="OpenAI model for plan generation (default: gpt-4o-mini)",
+    )
+    parser.add_argument(
+        "--prompt",
+        default=None,
+        help="Custom task description (overrides built-in demos)",
+    )
+    args = parser.parse_args()
+
+    # Check for API key
+    if not os.getenv("OPENAI_API_KEY"):
+        print("ERROR: OPENAI_API_KEY environment variable is not set.")
+        print()
+        print("Set it in your shell:")
+        print("  export OPENAI_API_KEY=sk-...")
+        print()
+        print("Or create a .env file in the project root:")
+        print("  OPENAI_API_KEY=sk-...")
+        sys.exit(1)
+
+    print()
+    print("=" * 60)
+    print("  LLM Plan Generation Demo")
+    print(f"  Model: {args.model}")
+    print("=" * 60)
+
+    # Set up mock tool manager with realistic tool catalog
+    tm = MockToolManager()
+    tool_names = [t.name for t in tm.get_all_tools()]
+
+    print(f"\n  Available tools ({len(tool_names)}):")
+    for name in tool_names:
+        desc = MockToolManager.TOOL_CATALOG[name]
+        print(f"    - {name}: {desc}")
+
+    # Import PlanAgent
+    from chuk_ai_planner.agents.plan_agent import PlanAgent
+
+    # Build prompts to demo
+    if args.prompt:
+        prompts = [args.prompt]
+    else:
+        prompts = [
+            "Read the auth module, find all places that import it, and then run the tests",
+            "Fetch user data from the API, save it to a file, and send a notification",
+        ]
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        for i, user_prompt in enumerate(prompts, 1):
+            section(f"Demo {i}: LLM Plan Generation")
+            print(f'  Prompt: "{user_prompt}"\n')
+
+            # ── Step 1: Generate plan with PlanAgent ──
+            print("  [1/4] Generating plan with LLM...\n")
+
+            system_prompt = build_system_prompt(tool_names)
+            agent = PlanAgent(
+                system_prompt=system_prompt,
+                validate_step=lambda step: validate_step(step, tool_names),
+                model=args.model,
+                max_retries=3,
+            )
+
+            try:
+                plan_dict = await agent.plan(user_prompt)
+            except RuntimeError as e:
+                print(f"  FAILED: {e}")
+                continue
+
+            # Show retry history
+            if len(agent.history) > 1:
+                print(
+                    f"  (PlanAgent needed {len(agent.history)} attempts "
+                    f"to produce a valid plan)\n"
+                )
+
+            print(f"  Title: {plan_dict.get('title', 'Untitled')}")
+            print(f"  Steps: {len(plan_dict.get('steps', []))}")
+            print()
+
+            # ── Step 2: Show generated plan ──
+            print("  [2/4] Generated plan:\n")
+            for step in plan_dict.get("steps", []):
+                idx = step.get("index", "?")
+                title = step.get("title", "Untitled")
+                tool = step.get("tool", "?")
+                deps = step.get("depends_on", [])
+                args_preview = json.dumps(step.get("args", {}), default=str)
+                if len(args_preview) > 60:
+                    args_preview = args_preview[:57] + "..."
+                dep_str = (
+                    f"  (after: {', '.join(str(d) for d in deps)})" if deps else ""
+                )
+                print(f"    Step {idx}: {title}")
+                print(f"      tool: {tool}({args_preview}){dep_str}")
+
+            # ── Step 3: DAG visualization ──
+            print("\n  [3/4] Execution DAG:\n")
+
+            # Ensure steps have index fields for DAG rendering
+            for j, step in enumerate(plan_dict.get("steps", []), 1):
+                if "index" not in step:
+                    step["index"] = str(j)
+
+            dag = render_plan_dag(plan_dict)
+            print(dag)
+
+            # ── Step 4: Execute the plan ──
+            print("\n  [4/4] Executing plan with mock tools:\n")
+
+            ctx = PlanningContext(tm, plans_dir=Path(tmpdir) / f"plans_{i}")
+
+            def on_start(index, title, tool_name):
+                print(f"    [{index}] {title} [{tool_name}]...")
+
+            def on_complete(step_result):
+                if step_result.success:
+                    preview = str(step_result.result)[:50]
+                    print(f"         -> OK ({step_result.duration:.2f}s): {preview}")
+                else:
+                    print(f"         -> FAIL: {step_result.error}")
+
+            runner = PlanRunner(
+                ctx,
+                on_step_start=on_start,
+                on_step_complete=on_complete,
+                enable_guards=False,
+            )
+
+            result = await runner.execute_plan(plan_dict, checkpoint=False)
+
+            print(f"\n  Result:   {'SUCCESS' if result.success else 'FAILED'}")
+            print(f"  Steps:    {len(result.steps)}")
+            print(f"  Duration: {result.total_duration:.2f}s")
+
+            if result.variables:
+                print("  Variables:")
+                for key, value in result.variables.items():
+                    preview = str(value)[:50]
+                    print(f"    ${key} = {preview}")
+
+            # Show raw LLM output for transparency
+            print("\n  LLM generation history:")
+            for record in agent.history:
+                attempt = record["attempt"]
+                errors = record.get("errors", [])
+                status = "valid" if not errors else f"errors: {errors}"
+                print(f"    Attempt {attempt}: {status}")
+
+    print(f"\n{'=' * 60}")
+    print("  Demo complete!")
+    print(f"{'=' * 60}\n")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/planning/plan_parallel_demo.py b/examples/planning/plan_parallel_demo.py
new file mode 100644
index 00000000..3314d918
--- /dev/null
+++ b/examples/planning/plan_parallel_demo.py
@@ -0,0 +1,401 @@
+#!/usr/bin/env python
+"""
+Parallel execution demo — shows how independent steps run concurrently.
+
+Demonstrates:
+1. Topological batching: steps grouped by dependency structure
+2. Concurrent execution within batches via asyncio
+3. Diamond, fan-out, and pipeline DAG patterns
+4. Timing evidence that parallel steps run concurrently
+5. DAG visualization with parallel markers
+
+No API key or MCP server needed — runs entirely with mocks.
+
+Usage:
+    uv run python examples/planning/plan_parallel_demo.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import sys
+import time
+import tempfile
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
+
+from mcp_cli.planning.context import PlanningContext
+from mcp_cli.planning.executor import (
+    PlanRunner,
+    render_plan_dag,
+    _compute_batches,
+)
+
+
+# ── Slow Mock ToolManager ──────────────────────────────────────────────────
+
+
+@dataclass
+class FakeToolCallResult:
+    tool_name: str
+    success: bool = True
+    result: Any = None
+    error: str | None = None
+
+
+class SlowToolManager:
+    """ToolManager that takes 200ms per tool to demonstrate parallelism."""
+
+    DELAY = 0.2  # 200ms per tool call
+
+    def __init__(self):
+        self.call_times: list[tuple[str, float, float]] = []
+
+    @dataclass
+    class ToolInfo:
+        name: str
+
+    def get_all_tools(self):
+        tools = [
+            "fetch",
+            "parse",
+            "validate",
+            "transform",
+            "aggregate",
+            "store",
+            "notify",
+        ]
+        return [self.ToolInfo(name=n) for n in tools]
+
+    async def execute_tool(self, tool_name, arguments, namespace=None, timeout=None):
+        start = time.perf_counter()
+        await asyncio.sleep(self.DELAY)
+        end = time.perf_counter()
+        self.call_times.append((tool_name, start, end))
+        return FakeToolCallResult(
+            tool_name=tool_name,
+            result=f"{tool_name} result",
+        )
+
+
+# ── Demo Plans ──────────────────────────────────────────────────────────────
+
+# Pattern 1: Fan-out (1 → many)
+FANOUT_PLAN = {
+    "id": "fanout-demo",
+    "title": "Fan-Out Pattern (1 root, 5 parallel leaves)",
+    "steps": [
+        {
+            "index": "1",
+            "title": "Fetch data source",
+            "tool": "fetch",
+            "args": {},
+            "depends_on": [],
+            "result_variable": "data",
+        },
+        {
+            "index": "2",
+            "title": "Parse section A",
+            "tool": "parse",
+            "args": {"section": "A"},
+            "depends_on": ["1"],
+            "result_variable": "section_a",
+        },
+        {
+            "index": "3",
+            "title": "Parse section B",
+            "tool": "parse",
+            "args": {"section": "B"},
+            "depends_on": ["1"],
+            "result_variable": "section_b",
+        },
+        {
+            "index": "4",
+            "title": "Parse section C",
+            "tool": "parse",
+            "args": {"section": "C"},
+            "depends_on": ["1"],
+            "result_variable": "section_c",
+        },
+        {
+            "index": "5",
+            "title": "Parse section D",
+            "tool": "parse",
+            "args": {"section": "D"},
+            "depends_on": ["1"],
+            "result_variable": "section_d",
+        },
+        {
+            "index": "6",
+            "title": "Parse section E",
+            "tool": "parse",
+            "args": {"section": "E"},
+            "depends_on": ["1"],
+            "result_variable": "section_e",
+        },
+    ],
+}
+
+# Pattern 2: Diamond (1 → 2 → 1)
+DIAMOND_PLAN = {
+    "id": "diamond-demo",
+    "title": "Diamond Pattern (fork and join)",
+    "steps": [
+        {
+            "index": "1",
+            "title": "Fetch raw data",
+            "tool": "fetch",
+            "args": {},
+            "depends_on": [],
+            "result_variable": "raw",
+        },
+        {
+            "index": "2",
+            "title": "Validate schema",
+            "tool": "validate",
+            "args": {},
+            "depends_on": ["1"],
+            "result_variable": "schema_ok",
+        },
+        {
+            "index": "3",
+            "title": "Transform format",
+            "tool": "transform",
+            "args": {},
+            "depends_on": ["1"],
+            "result_variable": "transformed",
+        },
+        {
+            "index": "4",
+            "title": "Aggregate results",
+            "tool": "aggregate",
+            "args": {},
+            "depends_on": ["2", "3"],
+            "result_variable": "final",
+        },
+    ],
+}
+
+# Pattern 3: Wide pipeline (3 independent → 3 independent → 1 join)
+WIDE_PIPELINE = {
+    "id": "wide-pipeline",
+    "title": "Wide Pipeline (3 sources, 3 processors, 1 merge)",
+    "steps": [
+        {
+            "index": "1",
+            "title": "Fetch API A",
+            "tool": "fetch",
+            "args": {"source": "A"},
+            "depends_on": [],
+            "result_variable": "api_a",
+        },
+        {
+            "index": "2",
+            "title": "Fetch API B",
+            "tool": "fetch",
+            "args": {"source": "B"},
+            "depends_on": [],
+            "result_variable": "api_b",
+        },
+        {
+            "index": "3",
+            "title": "Fetch API C",
+            "tool": "fetch",
+            "args": {"source": "C"},
+            "depends_on": [],
+            "result_variable": "api_c",
+        },
+        {
+            "index": "4",
+            "title": "Process A",
+            "tool": "transform",
+            "args": {"data": "${api_a}"},
+            "depends_on": ["1"],
+            "result_variable": "proc_a",
+        },
+        {
+            "index": "5",
+            "title": "Process B",
+            "tool": "transform",
+            "args": {"data": "${api_b}"},
+            "depends_on": ["2"],
+            "result_variable": "proc_b",
+        },
+        {
+            "index": "6",
+            "title": "Process C",
+            "tool": "transform",
+            "args": {"data": "${api_c}"},
+            "depends_on": ["3"],
+            "result_variable": "proc_c",
+        },
+        {
+            "index": "7",
+            "title": "Merge all results",
+            "tool": "aggregate",
+            "args": {},
+            "depends_on": ["4", "5", "6"],
+            "result_variable": "merged",
+        },
+    ],
+}
+
+
+# ── Demo Runner ─────────────────────────────────────────────────────────────
+
+
+def section(title: str) -> None:
+    print(f"\n{'=' * 60}")
+    print(f"  {title}")
+    print(f"{'=' * 60}\n")
+
+
+def show_batches(steps):
+    """Show the computed batch structure."""
+    batches = _compute_batches(steps)
+    for i, batch in enumerate(batches, 1):
+        indices = [s.get("index", "?") for s in batch]
+        titles = [s.get("title", "?") for s in batch]
+        parallel = " (PARALLEL)" if len(batch) > 1 else ""
+        print(f"    Batch {i}{parallel}:")
+        for idx, title in zip(indices, titles):
+            print(f"      Step {idx}: {title}")
+
+
+def show_timing(call_times: list[tuple[str, float, float]], plan_start: float):
+    """Visualize execution timing as a timeline."""
+    if not call_times:
+        return
+
+    print("    Timeline (relative to plan start):\n")
+    for name, start, end in sorted(call_times, key=lambda x: x[1]):
+        offset = start - plan_start
+        duration = end - start
+        bar_start = int(offset * 20)  # 20 chars per second
+        bar_len = max(1, int(duration * 20))
+        bar = " " * bar_start + "\u2588" * bar_len
+        print(f"      {name:<15} [{bar}] {offset:.2f}s - {offset + duration:.2f}s")
+
+
+async def run_and_time(tm, ctx, plan):
+    """Execute a plan and return (result, plan_start_time)."""
+    plan_start = time.perf_counter()
+
+    runner = PlanRunner(ctx, enable_guards=False)
+    result = await runner.execute_plan(plan, checkpoint=False)
+
+    return result, plan_start
+
+
+async def main() -> None:
+    print()
+    print("=" * 60)
+    print("  Parallel Execution Demo")
+    print("  Topological batching & concurrent step execution")
+    print("=" * 60)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # ── Pattern 1: Fan-Out ──
+        section("1. Fan-Out Pattern")
+        print("  DAG:")
+        print(render_plan_dag(FANOUT_PLAN))
+        print()
+
+        print("  Batch structure:")
+        show_batches(FANOUT_PLAN["steps"])
+        print()
+
+        tm1 = SlowToolManager()
+        ctx1 = PlanningContext(tm1, plans_dir=Path(tmpdir) / "p1")
+        result, t0 = await run_and_time(tm1, ctx1, FANOUT_PLAN)
+
+        serial_time = len(FANOUT_PLAN["steps"]) * SlowToolManager.DELAY
+        print(
+            f"  Execution: {result.total_duration:.2f}s "
+            f"(vs {serial_time:.2f}s serial = "
+            f"{serial_time / max(0.01, result.total_duration):.1f}x speedup)"
+        )
+        print()
+        show_timing(tm1.call_times, t0)
+
+        # ── Pattern 2: Diamond ──
+        section("2. Diamond Pattern")
+        print("  DAG:")
+        print(render_plan_dag(DIAMOND_PLAN))
+        print()
+
+        print("  Batch structure:")
+        show_batches(DIAMOND_PLAN["steps"])
+        print()
+
+        tm2 = SlowToolManager()
+        ctx2 = PlanningContext(tm2, plans_dir=Path(tmpdir) / "p2")
+        result, t0 = await run_and_time(tm2, ctx2, DIAMOND_PLAN)
+
+        serial_time = len(DIAMOND_PLAN["steps"]) * SlowToolManager.DELAY
+        print(
+            f"  Execution: {result.total_duration:.2f}s "
+            f"(vs {serial_time:.2f}s serial = "
+            f"{serial_time / max(0.01, result.total_duration):.1f}x speedup)"
+        )
+        print()
+        show_timing(tm2.call_times, t0)
+
+        # ── Pattern 3: Wide Pipeline ──
+        section("3. Wide Pipeline Pattern")
+        print("  DAG:")
+        print(render_plan_dag(WIDE_PIPELINE))
+        print()
+
+        print("  Batch structure:")
+        show_batches(WIDE_PIPELINE["steps"])
+        print()
+
+        tm3 = SlowToolManager()
+        ctx3 = PlanningContext(tm3, plans_dir=Path(tmpdir) / "p3")
+        result, t0 = await run_and_time(tm3, ctx3, WIDE_PIPELINE)
+
+        serial_time = len(WIDE_PIPELINE["steps"]) * SlowToolManager.DELAY
+        print(
+            f"  Execution: {result.total_duration:.2f}s "
+            f"(vs {serial_time:.2f}s serial = "
+            f"{serial_time / max(0.01, result.total_duration):.1f}x speedup)"
+        )
+        print()
+        show_timing(tm3.call_times, t0)
+
+        # ── Summary ──
+        section("Summary")
+        print("  Pattern       | Steps | Batches | Serial  | Parallel | Speedup")
+        print("  " + "-" * 56)
+
+        for name, plan, tm_obj in [
+            ("Fan-Out", FANOUT_PLAN, tm1),
+            ("Diamond", DIAMOND_PLAN, tm2),
+            ("Wide Pipeline", WIDE_PIPELINE, tm3),
+        ]:
+            n_steps = len(plan["steps"])
+            batches = _compute_batches(plan["steps"])
+            serial = n_steps * SlowToolManager.DELAY
+            # Total wall time from first start to last end
+            if tm_obj.call_times:
+                all_starts = [s for _, s, _ in tm_obj.call_times]
+                all_ends = [e for _, _, e in tm_obj.call_times]
+                wall = max(all_ends) - min(all_starts)
+            else:
+                wall = 0
+            speedup = serial / max(0.01, wall)
+            print(
+                f"  {name:<15} | {n_steps:>5} | {len(batches):>7} | {serial:>6.2f}s | {wall:>7.2f}s | {speedup:>5.1f}x"
+            )
+
+    print(f"\n{'=' * 60}")
+    print("  Demo complete!")
+    print(f"{'=' * 60}\n")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/pyproject.toml b/pyproject.toml
index 383990cf..0e1399bb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,7 @@ keywords = ["llm", "openai", "claude", "mcp", "cli"]
 license = {text = "MIT"}
 dependencies = [
   "asyncio>=3.4.3",
+  "chuk-ai-planner>=0.2",
   "chuk-ai-session-manager>=0.11",
   "chuk-llm>=0.17.1",
   "chuk-mcp-client-oauth>=0.3.5",
@@ -113,6 +114,7 @@ branch = true
 
 [tool.coverage.report]
 show_missing = true
+fail_under = 60
 exclude_lines = [
     "pragma: no cover",
     "if TYPE_CHECKING:",
diff --git a/roadmap.md b/roadmap.md
index 45e5f027..f2666251 100644
--- a/roadmap.md
+++ b/roadmap.md
@@ -390,52 +390,160 @@ OS-style virtual memory for conversation context management, powered by `chuk-ai
 
 ---
 
-## Tier 6: Execution Graphs & Plans
+## Tier 6: Execution Graphs & Plans ✅ COMPLETE
 
 > **Shift:** conversation → reasoning → tools **becomes** intent → plan → execution → memory → replay
+>
+> **Spec:** `specs/6.0-planner-integration.md`
+> **Integration:** `chuk-ai-planner>=0.2` — graph-based plan DSL, executor, LLM plan generation
 
-### 6.1 First-Class Plans
+### 6.0 Planner Foundation Wiring ✅
 
-Today the AI reasons from scratch each time. Plans make workflows reproducible, shareable, schedulable, and testable — Terraform for agents.
+Bridge `chuk-ai-planner` to mcp-cli's MCP tool execution layer.
+
+**Files:**
+- `src/mcp_cli/planning/backends.py` — `McpToolBackend` (implements `ToolExecutionBackend` protocol, wraps `ToolManager.execute_tool()`)
+- `src/mcp_cli/planning/context.py` — `PlanningContext` (state container: graph store, tool manager, plan registry, tool catalog)
+- `src/mcp_cli/planning/executor.py` — `PlanRunner` (orchestrates plan execution with guard integration, dry-run, checkpointing)
+- `src/mcp_cli/planning/__init__.py` — Public API
+
+**Key integration:** chuk-ai-planner's `ToolProcessorBackend` calls `CTP.process()` for registered Python functions. `McpToolBackend` instead calls `ToolManager.execute_tool()` for real MCP server tools — same protocol interface, different execution path.
+
+### 6.1 Plan Commands ✅
 
 ```
-mcp plan create "plan a coastal walk tomorrow"
-mcp plan inspect 42
-mcp plan run 42
-mcp plan replay 42 --dry-run
+mcp plan create "add auth to this API"
+mcp plan list
+mcp plan show <id>
+mcp plan run <id>
+mcp plan run <id> --dry-run
+mcp plan delete <id>
+mcp plan resume <id>
 ```
 
+**Files:**
+- `src/mcp_cli/commands/plan/plan.py` — `PlanCommand` (unified command, supports CHAT + CLI + INTERACTIVE)
+- `src/mcp_cli/config/enums.py` — `PlanAction` enum
+
+**Chat mode:** `/plan create "description"`, `/plan list`, `/plan run <id>`
+
 - Plan = persistent, inspectable execution graph (DAG of tool calls + decisions)
-- Plans are serialized (YAML/JSON) and version-controlled
-- `replay --dry-run` shows what would execute without side effects
-- Plans can be parameterized: `mcp plan run 42 --date 2026-03-01`
+- Plans are serialized as JSON at `~/.mcp-cli/plans/`
+- `--dry-run` shows what would execute without side effects
+- Plans can be parameterized: `mcp plan run <id> --var date=2026-03-01`
+
+### 6.2 Plan Execution with Guards ✅
+
+Plan execution respects mcp-cli's existing guard infrastructure:
 
-### 6.2 Simulation / Dry-Run Mode
+- Pre-execution: `ToolStateManager.check_all_guards()` — budget, runaway, per-tool limits
+- Post-execution: `ToolStateManager.record_tool_call()` — tracking + value binding
+- Step error handling: retry (via `PlanStep.max_retries`), fallback, or pause for user input
+- Budget shared with conversation — plan execution counts against same limits
+- 55 tests covering guard integration, PlanRegistry round-trips, DAG visualization
+
+### 6.3 Execution Checkpointing & Resume ✅
+
+- After each step: persist state to `~/.mcp-cli/plans/{id}_state.json`
+- `mcp plan resume <id>` — loads checkpoint, skips completed steps, continues
+- Tracks: completed steps, variable bindings, failed steps, timing
+
+### 6.4 Simulation / Dry-Run Mode ✅
 
 Critical for trust. Show planned tool calls without executing them.
 
 ```
-mcp run "delete inactive users" --simulate
+mcp plan run <id> --dry-run
 ```
 
-- Traces the full execution path
-- Shows tool calls that *would* happen, with estimated arguments
+- Walks plan in topological order
+- Resolves `${var}` references
+- Displays each step: tool name, resolved arguments, dependencies
+- Reports estimated tool call count
 - No side effects — safe to run in production
-- Foundation for plan creation: `--simulate` output becomes a plan
 
-### 6.3 Deterministic Mode
+### 6.5 Parallel Step Execution ✅
+
+Independent plan steps execute concurrently via topological batch ordering:
+
+- `_compute_batches()` uses Kahn's BFS topological sort to group steps into parallel batches
+- Steps within a batch run concurrently via `asyncio.gather()` with semaphore-controlled concurrency
+- Batches execute sequentially to respect dependency ordering
+- `max_concurrency` parameter (default: 4) limits concurrent tool calls
+- Diamond DAG (1 → 2,3,4 → 5) executes with 3 batches: [1], [2,3,4], [5]
+- Variable resolution: `${var}`, `${var.field.subfield}`, template strings — type-preserving for single refs
+
+### 6.6 DAG Visualization ✅
+
+Terminal visualization of plan execution:
+
+- Terminal: ASCII DAG rendering with step status indicators (○ pending, ◉ running, ● completed, ✗ failed)
+- `render_plan_dag()` function for terminal display
+- Parallel step indicator (∥) marks steps that run concurrently within a batch
+- Browser: MCP App panel with D3 force-directed graph, live WebSocket updates (Future)
 
-Enterprise reliability — bounded, predictable execution.
+### 6.7 Re-planning ✅
+
+Adaptive re-planning when execution hits problems (opt-in via `enable_replan=True`):
+
+- On step failure: injects failure context (completed steps, error, remaining steps, variables) into PlanAgent
+- PlanAgent generates a revised plan for the remaining work
+- Revised plan executes with the current variable context (no recursive re-planning)
+- Results merged: completed steps from original + steps from revised plan
+- `max_replans` parameter (default: 2) limits re-planning attempts
+- `PlanExecutionResult.replanned` flag indicates whether re-planning occurred
+- Disabled by default — failure just fails without LLM involvement
+
+### 6.8 Model-Driven Planning (Plan as a Tool) ✅
+
+The model can autonomously create and execute plans during conversation — no `/plan` command required.
+
+When the model determines a task needs multi-step orchestration, it calls an internal `plan` tool to decompose the task into a structured execution graph, then executes it — all within the normal chat flow.
+
+**Internal tools (intercepted before MCP routing, like VM tools):**
+
+| Tool | Purpose |
+|------|---------|
+| `plan_create` | Model describes a goal → PlanAgent generates a plan DAG → returns plan ID + step summary |
+| `plan_execute` | Model passes plan ID → PlanRunner executes → returns results + variables |
+| `plan_create_and_execute` | Combined: generate + execute in one call (common case) |
+
+**How it works:**
 
 ```
-mcp run "book cheapest train" --deterministic
+User: "What's the weather like for sailing in Raglan tomorrow?"
+
+Model (internally): This needs geocoding then weather lookup.
+  → calls plan_create_and_execute(goal="Get weather forecast for Raglan, NZ")
+  → PlanAgent generates: [geocode Raglan] → [get weather for coords]
+  → PlanRunner executes both steps via MCP servers
+  → Results flow back to model as tool result
+
+Model: "Tomorrow in Raglan: 18°C, light winds from the SW at 12 km/h,
+        partly cloudy. Good conditions for sailing."
 ```
 
-- Fixed tool selection (no exploration)
-- Bounded reasoning (max turns, max tokens)
-- Structured outputs with schema validation
-- Configurable retry policies
-- Reproducible given same inputs
+**Key design decisions:**
+
+- **Intercepted like VM tools:** `plan_create`, `plan_execute`, `plan_create_and_execute` are caught in `tool_processor.py` before MCP guard routing, executed locally via PlanRunner
+- **Model decides when to plan:** The system prompt describes the planning tools; the model calls them when it determines multi-step orchestration is more effective than sequential tool calls
+- **Plans are ephemeral by default:** Created during conversation, not persisted unless the model or user explicitly saves them. Reduces clutter vs `/plan create`
+- **Shares guard budget:** Plan tool calls count against the same budget as regular tool calls
+- **Display integration:** Plan execution renders with the same `StreamingDisplayManager` callbacks as regular tool calls — the user sees each step executing in real time
+- **Variable flow:** Plan results are returned as the tool result, so the model can reference them naturally in its response
+- **Opt-in via system prompt:** The planning tools only appear when `--enable-plan-tools` is set (or equivalent config), so the model doesn't attempt planning on simple tasks
+
+**Files:**
+- `src/mcp_cli/chat/tool_processor.py` — Intercept `plan_create` / `plan_execute` / `plan_create_and_execute` before MCP routing
+- `src/mcp_cli/planning/tools.py` — Tool definitions (OpenAI function format) and execution handlers
+- `src/mcp_cli/chat/system_prompt.py` — Inject planning tool descriptions when enabled
+- `src/mcp_cli/config/defaults.py` — `DEFAULT_ENABLE_PLAN_TOOLS = False`
+
+**Why this matters:**
+
+Today: User types `/plan create "get weather for Raglan"` → plan generated → user types `/plan run <id>` → result shown. Three interactions.
+
+With 6.8: User asks a question → model decides it needs a plan → creates and executes it → answers. One interaction. The model becomes a self-orchestrating agent when the task demands it, and a simple chatbot when it doesn't.
 
 ---
 
@@ -925,7 +1033,8 @@ mcp remote logs --follow
 | **5** | Production hardening | Observable, auditable | ✅ Complete |
 | **VM** | AI Virtual Memory | OS-style context management | ✅ Complete (Experimental) |
 | **Review** | Code review fixes | Silent exceptions, dead code, test gaps | ✅ Complete |
-| **6** | Plans & execution graphs | Reproducible workflows | High |
+| **6** | Plans & execution graphs | Reproducible workflows | ✅ Complete (6.0–6.7) |
+| **6.8** | Model-driven planning | Model creates & executes plans as tools | High |
 | **7** | Observability & traces | Debugger for AI behavior | High |
 | **8** | Memory scopes | Long-running assistants | High |
 | **9** | Skills & capabilities | Portable behaviour layer | High |
@@ -937,7 +1046,7 @@ mcp remote logs --follow
 
 These change the category of the tool from **chat interface** to **agent operating system**:
 
-1. **Plans** (Tier 6) — reproducible, inspectable execution
+1. **Plans** (Tier 6) — reproducible, inspectable execution; model-driven planning (6.8) makes the model a self-orchestrating agent
 2. **Traces** (Tier 7) — explainable AI operations
 3. **Skills** (Tier 9) — portable, reusable behaviour (the npm for agents)
 4. **Scheduling** (Tier 10) — autonomous background agents
diff --git a/specs/6.0-planner-integration.md b/specs/6.0-planner-integration.md
new file mode 100644
index 00000000..1b7a94dd
--- /dev/null
+++ b/specs/6.0-planner-integration.md
@@ -0,0 +1,258 @@
+# Spec 6.0: Planner Integration ✅ COMPLETE
+
+> **Tier 6 — Execution Graphs & Plans**
+> Shift: conversation → reasoning → tools **becomes** intent → plan → execution → memory → replay
+
+## Overview
+
+Integrate `chuk-ai-planner` into mcp-cli so that users can generate, inspect, persist, and execute structured plans that decompose complex tasks into dependency-aware tool call graphs. Plans make workflows reproducible, shareable, and inspectable — Terraform for agents.
+
+## Motivation
+
+Today mcp-cli's conversation loop reasons from scratch every turn. The model decides which tools to call reactively. This works for simple tasks but fails for complex multi-step workflows:
+
+- **No reproducibility** — same prompt may produce different tool sequences
+- **No inspection** — can't review what the agent will do before it does it
+- **No resumability** — interrupted workflows start over
+- **No parallelism awareness** — independent steps execute sequentially because the model emits them one turn at a time
+
+Plans solve all four problems by making the execution graph explicit, persistent, and executable.
+
+## Architecture
+
+### Component Relationships
+
+```
+User
+  │
+  ├── /plan create "task description"
+  │     │
+  │     ├── PlanAgent (LLM generates structured JSON plan)
+  │     │     └── validates tool names against ToolManager catalog
+  │     │
+  │     └── UniversalPlan (DSL builds graph in GraphStore)
+  │           └── PlanRegistry (persists to ~/.mcp-cli/plans/)
+  │
+  ├── /plan run <id>
+  │     │
+  │     └── UniversalExecutor
+  │           ├── topological sort (respects step dependencies)
+  │           ├── variable resolution (${var} syntax)
+  │           └── McpToolBackend (bridges to ToolManager)
+  │                 └── ToolManager.execute_tool() → MCP servers
+  │
+  └── /plan list | show | delete | run --dry-run
+```
+
+### The Bridge: McpToolBackend
+
+The critical integration point. chuk-ai-planner defines a `ToolExecutionBackend` protocol:
+
+```python
+class ToolExecutionBackend(Protocol):
+    async def execute_tool(self, request: ToolExecutionRequest) -> ToolExecutionResult
+```
+
+The existing `ToolProcessorBackend` calls `CTP.process()` with fake OpenAI payloads. This works for registered Python functions but not for mcp-cli's MCP server tools, which are accessed via `ToolManager.execute_tool()`.
+
+`McpToolBackend` bridges this gap:
+
+```
+ToolExecutionRequest (planner)
+    → McpToolBackend.execute_tool()
+        → ToolManager.execute_tool(name, args)
+            → StreamManager → MCP server
+        → ToolExecutionResult (planner)
+```
+
+### File Layout
+
+```
+src/mcp_cli/planning/
+  __init__.py              # Public API
+  backends.py              # McpToolBackend — bridges planner to ToolManager
+  context.py               # PlanningContext — state container for plan operations
+  executor.py              # PlanRunner — orchestrates plan execution with guards
+  registry.py              # Thin wrapper around chuk-ai-planner's PlanRegistry
+
+src/mcp_cli/commands/plan/
+  __init__.py              # Exports PlanCommand
+  plan.py                  # PlanCommand — /plan slash command + CLI
+```
+
+## Implementation
+
+### 6.0.1 McpToolBackend (`planning/backends.py`)
+
+Implements `ToolExecutionBackend` protocol using mcp-cli's `ToolManager`:
+
+- Receives `ToolExecutionRequest` from planner
+- Resolves tool name (handles namespaced tools like `server__tool`)
+- Calls `ToolManager.execute_tool(tool_name, args)`
+- Wraps `ToolCallResult` → `ToolExecutionResult`
+- Measures execution duration
+- Catches exceptions → returns error in `ToolExecutionResult`
+
+### 6.0.2 PlanningContext (`planning/context.py`)
+
+State container holding everything needed for plan operations:
+
+- `graph_store: InMemoryGraphStore` — the plan graph
+- `tool_manager: ToolManager` — for tool execution and catalog
+- `plan_registry: PlanRegistry` — for persistence at `~/.mcp-cli/plans/`
+- `get_tool_catalog() -> list[dict]` — returns available tools for LLM plan generation prompts
+
+### 6.0.3 PlanRunner (`planning/executor.py`)
+
+Orchestrates plan execution with mcp-cli's guard infrastructure:
+
+- Creates `UniversalExecutor` with `McpToolBackend`
+- Before each step: checks `ToolStateManager.check_all_guards()`
+- After each step: calls `ToolStateManager.record_tool_call()`
+- Step error handling: retry, fallback, or pause for user input
+- Dry-run mode: walks plan without executing, displays resolved args
+- Progress callbacks for terminal/dashboard display
+
+### 6.0.4 PlanCommand (`commands/plan/plan.py`)
+
+Unified command supporting chat, CLI, and interactive modes:
+
+| Subcommand | Description |
+|------------|-------------|
+| `create "description"` | Generate plan from natural language |
+| `list` | List saved plans |
+| `show <id>` | Display plan structure |
+| `run <id>` | Execute a saved plan |
+| `run <id> --dry-run` | Trace without executing |
+| `run <id> --var key=val` | Parameterized execution |
+| `delete <id>` | Remove from registry |
+| `resume <id>` | Continue interrupted plan |
+
+Default action (no subcommand): `list`.
+
+### 6.0.5 PlanAction Enum (`config/enums.py`)
+
+```python
+class PlanAction(str, Enum):
+    CREATE = "create"
+    LIST = "list"
+    SHOW = "show"
+    RUN = "run"
+    DELETE = "delete"
+    RESUME = "resume"
+```
+
+## Plan Format
+
+Plans are persisted as JSON at `~/.mcp-cli/plans/{plan_id}.json`:
+
+```json
+{
+  "id": "uuid",
+  "title": "Add authentication to REST API",
+  "description": "Generated from user request",
+  "created_at": "2026-02-25T10:00:00Z",
+  "tags": [],
+  "variables": {},
+  "steps": [
+    {
+      "id": "uuid",
+      "index": "1",
+      "title": "Read existing app code",
+      "tool_calls": [
+        {"id": "uuid", "name": "read_file", "args": {"path": "src/app.py"}}
+      ],
+      "depends_on": [],
+      "result_variable": "app_code"
+    },
+    {
+      "id": "uuid",
+      "index": "2",
+      "title": "Create auth module",
+      "tool_calls": [
+        {"id": "uuid", "name": "write_file", "args": {"path": "src/auth.py", "content": "${app_code}"}}
+      ],
+      "depends_on": ["1"],
+      "result_variable": "auth_module"
+    }
+  ]
+}
+```
+
+## Execution Checkpointing
+
+After each step completes, persist execution state alongside the plan:
+
+`~/.mcp-cli/plans/{plan_id}_state.json`:
+
+```json
+{
+  "plan_id": "uuid",
+  "status": "running",
+  "completed_steps": ["1"],
+  "variables": {"app_code": "..."},
+  "failed_steps": {},
+  "started_at": "2026-02-25T10:00:00Z"
+}
+```
+
+`/plan resume <id>` loads this state, skips completed steps, and continues.
+
+## Guard Integration
+
+Plan execution respects existing guard infrastructure:
+
+1. **Pre-execution**: `ToolStateManager.check_all_guards(tool_name, args)` — budget, runaway, per-tool limits
+2. **Execution**: `McpToolBackend.execute_tool(request)` — actual MCP call
+3. **Post-execution**: `ToolStateManager.record_tool_call(tool_name)` — tracking
+4. **Value binding**: Result stored as `$vN` (guard system) AND `${variable}` (plan context)
+
+Budget is shared with the conversation — plan execution counts against the same limits.
+
+## Dry-Run Mode
+
+`/plan run <id> --dry-run` or `mcp plan run <id> --simulate`:
+
+- Walks the plan in topological order
+- Resolves `${var}` references (using defaults or previous step outputs)
+- Displays each step: tool name, resolved arguments, dependencies
+- Reports estimated tool call count
+- No side effects — safe to run in production
+
+## Terminal Display
+
+Plan display in the terminal:
+
+```
+Plan: Add authentication to REST API (4 steps)
+
+  1. Read existing app code           [read_file]
+  2. Analyze dependencies             [search_code]    ← after: 1
+  3. Create auth module               [write_file]     ← after: 1, 2
+  4. Run tests                        [run_command]    ← after: 3
+
+Variables: app_code (step 1), deps (step 2), auth_module (step 3)
+
+Execute? [y/n/edit]
+```
+
+## Dependencies
+
+- `chuk-ai-planner>=0.2` added to `pyproject.toml`
+- Uses: `UniversalPlan`, `UniversalExecutor`, `PlanAgent`, `GraphPlanAgent`, `PlanRegistry`
+- Uses: `InMemoryGraphStore` (persistent store is a future tier)
+
+## Testing
+
+- `tests/planning/test_backends.py` — McpToolBackend with mock ToolManager
+- `tests/planning/test_executor.py` — PlanRunner with mock backend
+- `tests/planning/test_context.py` — PlanningContext initialization
+- `tests/commands/test_plan_command.py` — PlanCommand subcommand dispatch
+
+## Future Work (Not in This Tier)
+
+- Parallel step execution (batch steps with no dependencies)
+- DAG visualization (terminal ASCII + MCP App browser panel)
+- Re-planning on failure (LLM revises remaining steps)
+- Plan parameterization (`--var key=value`)
+- Integration with CTP's `GreedyDagScheduler` for deadline-aware scheduling
diff --git a/src/mcp_cli/adapters/interactive.py b/src/mcp_cli/adapters/interactive.py
index eb98ed05..c529b308 100644
--- a/src/mcp_cli/adapters/interactive.py
+++ b/src/mcp_cli/adapters/interactive.py
@@ -243,13 +243,13 @@ def get_completions(partial_line: str, cursor_pos: int) -> list[str]:
                 option_name, partial_value = current_arg.split("=", 1)
                 param_name = option_name[2:]
 
-                param = next(
+                found_param = next(
                     (p for p in command.parameters if p.name == param_name),
-                    None,  # type: ignore[arg-type]
+                    None,
                 )
 
-                if param and param.choices:
-                    for choice in param.choices:
+                if found_param and found_param.choices:
+                    for choice in found_param.choices:
                         if str(choice).startswith(partial_value):
                             completions.append(f"{option_name}={choice}")
 
diff --git a/src/mcp_cli/chat/chat_context.py b/src/mcp_cli/chat/chat_context.py
index 3d387c43..351858bd 100644
--- a/src/mcp_cli/chat/chat_context.py
+++ b/src/mcp_cli/chat/chat_context.py
@@ -62,6 +62,7 @@ def __init__(
         vm_mode: str = "passive",
         vm_budget: int = 128_000,
         health_interval: int = 0,
+        enable_plan_tools: bool = False,
     ):
         """
         Create chat context with required managers.
@@ -78,6 +79,7 @@ def __init__(
             vm_mode: VM mode - strict, relaxed, or passive
             vm_budget: Max tokens for VM L0 working set (context window budget)
             health_interval: Background health check interval in seconds (0 = disabled)
+            enable_plan_tools: Enable plan_create/plan_execute as LLM-callable tools
         """
         self.tool_manager = tool_manager
         self.model_manager = model_manager
@@ -92,6 +94,7 @@ def __init__(
         self._vm_mode = vm_mode
         self._vm_budget = vm_budget
         self._health_interval = health_interval
+        self._enable_plan_tools = enable_plan_tools
 
         # Core session manager - always required
         self.session: SessionManager = SessionManager(session_id=self.session_id)
@@ -162,6 +165,7 @@ def create(
         vm_mode: str = "passive",
         vm_budget: int = 128_000,
         health_interval: int = 0,
+        enable_plan_tools: bool = False,
     ) -> "ChatContext":
         """
         Factory method for convenient creation.
@@ -182,6 +186,7 @@ def create(
             vm_mode: VM mode - strict, relaxed, or passive
             vm_budget: Max tokens for VM L0 working set (context window budget)
             health_interval: Background health check interval in seconds (0 = disabled)
+            enable_plan_tools: Enable plan_create/plan_execute as LLM-callable tools
 
         Returns:
             Configured ChatContext instance
@@ -214,6 +219,7 @@ def create(
             vm_mode=vm_mode,
             vm_budget=vm_budget,
             health_interval=health_interval,
+            enable_plan_tools=enable_plan_tools,
         )
 
     # ── Properties ────────────────────────────────────────────────────────
diff --git a/src/mcp_cli/chat/chat_handler.py b/src/mcp_cli/chat/chat_handler.py
index 3832717a..3ac966ff 100644
--- a/src/mcp_cli/chat/chat_handler.py
+++ b/src/mcp_cli/chat/chat_handler.py
@@ -48,6 +48,7 @@ async def handle_chat_mode(
     vm_mode: str = "passive",
     vm_budget: int = 128_000,
     health_interval: int = 0,
+    enable_plan_tools: bool = False,
 ) -> bool:
     """
     Launch the interactive chat loop with streaming support.
@@ -106,6 +107,7 @@ def on_progress(msg: str) -> None:
             vm_mode=vm_mode,
             vm_budget=vm_budget,
             health_interval=health_interval,
+            enable_plan_tools=enable_plan_tools,
         )
 
         if not await ctx.initialize(on_progress=on_progress):
diff --git a/src/mcp_cli/chat/conversation.py b/src/mcp_cli/chat/conversation.py
index 007d9087..8510e84c 100644
--- a/src/mcp_cli/chat/conversation.py
+++ b/src/mcp_cli/chat/conversation.py
@@ -210,6 +210,10 @@ async def process_conversation(self, max_turns: int = 100):
                     if not getattr(self.context, "openai_tools", None):
                         await self._load_tools()
 
+                    # Inject internal tools (plan, VM, memory) even when
+                    # openai_tools were pre-loaded by ChatContext.
+                    await self._inject_internal_tools()
+
                     # REMOVED: Sanitization logic - now handled by universal tool compatibility
                     # The OpenAI client automatically handles tool name sanitization and restoration
 
@@ -752,30 +756,87 @@ async def _load_tools(self):
             self.context.openai_tools = []
             self.context.tool_name_mapping = {}
 
+        # Inject internal tools (plan, VM, memory) after loading MCP tools
+        await self._inject_internal_tools()
+
+    async def _inject_internal_tools(self):
+        """Inject internal (non-MCP) tools into the tool list.
+
+        Idempotent — checks for existing tool names before adding.
+        Called both from _load_tools() and from the main loop to handle
+        the case where openai_tools were pre-loaded by ChatContext.
+        """
+        tools = getattr(self.context, "openai_tools", None)
+        if tools is None:
+            return
+
+        # Build set of existing tool names for dedup
+        existing = {
+            t.get("function", {}).get("name", "") for t in tools if isinstance(t, dict)
+        }
+
         # Inject VM tools for strict/relaxed modes
         vm = getattr(getattr(self.context, "session", None), "vm", None)
         vm_mode = getattr(getattr(vm, "mode", None), "value", "passive")
-        if vm and vm_mode != "passive":
+        if vm and vm_mode != "passive" and "page_fault" not in existing:
             try:
                 from chuk_ai_session_manager.memory.vm_prompts import (
                     get_vm_tools_as_dicts,
                 )
 
                 vm_tools = get_vm_tools_as_dicts(include_search=True)
-                self.context.openai_tools.extend(vm_tools)
-                logger.info(f"Injected {len(vm_tools)} VM tools for {vm_mode} mode")
+                new_vm = [
+                    t
+                    for t in vm_tools
+                    if t.get("function", {}).get("name", "") not in existing
+                ]
+                if new_vm:
+                    self.context.openai_tools.extend(new_vm)
+                    existing.update(
+                        t.get("function", {}).get("name", "") for t in new_vm
+                    )
+                    logger.info(f"Injected {len(new_vm)} VM tools for {vm_mode} mode")
             except Exception as exc:
                 logger.warning(f"Could not load VM tools: {exc}")
 
+        # Inject plan tools when enabled
+        if (
+            getattr(self.context, "_enable_plan_tools", False)
+            and "plan_create_and_execute" not in existing
+        ):
+            try:
+                from mcp_cli.planning.tools import get_plan_tools_as_dicts
+
+                plan_tools = get_plan_tools_as_dicts()
+                new_plan = [
+                    t
+                    for t in plan_tools
+                    if t.get("function", {}).get("name", "") not in existing
+                ]
+                if new_plan:
+                    self.context.openai_tools.extend(new_plan)
+                    existing.update(
+                        t.get("function", {}).get("name", "") for t in new_plan
+                    )
+                    logger.info(f"Injected {len(new_plan)} plan tools")
+            except Exception as exc:
+                logger.warning(f"Could not load plan tools: {exc}")
+
         # Inject persistent memory scope tools
         store = getattr(self.context, "memory_store", None)
-        if store:
+        if store and "memory_store_page" not in existing:
             try:
                 from mcp_cli.memory.tools import get_memory_tools_as_dicts
 
                 memory_tools = get_memory_tools_as_dicts()
-                self.context.openai_tools.extend(memory_tools)
-                logger.info(f"Injected {len(memory_tools)} memory scope tools")
+                new_mem = [
+                    t
+                    for t in memory_tools
+                    if t.get("function", {}).get("name", "") not in existing
+                ]
+                if new_mem:
+                    self.context.openai_tools.extend(new_mem)
+                    logger.info(f"Injected {len(new_mem)} memory scope tools")
             except Exception as exc:
                 logger.warning(f"Could not load memory tools: {exc}")
 
diff --git a/src/mcp_cli/chat/models.py b/src/mcp_cli/chat/models.py
index 8a2325bf..5494ee71 100644
--- a/src/mcp_cli/chat/models.py
+++ b/src/mcp_cli/chat/models.py
@@ -318,6 +318,9 @@ class ToolProcessorContext(Protocol):
     # Optional processor back-reference (set by ToolProcessor)
     tool_processor: Any  # Will be set to ToolProcessor instance
 
+    # Optional planning context (lazy-created by _handle_plan_tool)
+    _planning_context: Any
+
     def get_display_name_for_tool(self, tool_name: str) -> str:
         """Get display name for a tool (may be namespaced)."""
         ...
diff --git a/src/mcp_cli/chat/tool_processor.py b/src/mcp_cli/chat/tool_processor.py
index 735c5c4b..cb1ff8eb 100644
--- a/src/mcp_cli/chat/tool_processor.py
+++ b/src/mcp_cli/chat/tool_processor.py
@@ -34,6 +34,7 @@
 from chuk_tool_processor.discovery import get_search_engine
 from mcp_cli.llm.content_models import ContentBlockType
 from mcp_cli.memory.tools import _MEMORY_TOOL_NAMES
+from mcp_cli.planning.tools import _PLAN_TOOL_NAMES
 from mcp_cli.utils.preferences import get_preference_manager
 
 if TYPE_CHECKING:
@@ -45,6 +46,7 @@
 _VM_TOOL_NAMES = frozenset({"page_fault", "search_pages"})
 
 # _MEMORY_TOOL_NAMES imported from mcp_cli.memory.tools (single source of truth)
+# _PLAN_TOOL_NAMES imported from mcp_cli.planning.tools (single source of truth)
 
 
 class ToolProcessor:
@@ -205,6 +207,15 @@ async def process_tool_calls(
                     )
                     continue
 
+                # ── Plan tool interception ─────────────────────────────
+                # plan_create, plan_execute, plan_create_and_execute are
+                # internal planning ops — not routed to MCP ToolManager.
+                if execution_tool_name in _PLAN_TOOL_NAMES:
+                    await self._handle_plan_tool(
+                        execution_tool_name, arguments, llm_tool_name, call_id
+                    )
+                    continue
+
                 # DEBUG: Log exactly what the model sent for this tool call
                 logger.info(f"TOOL CALL FROM MODEL: {llm_tool_name} id={call_id}")
                 logger.info(f"  raw_arguments: {raw_arguments}")
@@ -653,6 +664,50 @@ async def _handle_memory_tool(
 
         self._add_tool_result_to_history(llm_tool_name, call_id, result_text)
 
+    async def _handle_plan_tool(
+        self,
+        tool_name: str,
+        arguments: dict,
+        llm_tool_name: str,
+        call_id: str,
+    ) -> None:
+        """Execute a plan tool (plan_create, plan_execute, plan_create_and_execute).
+
+        Plan tools are internal operations that bypass the MCP ToolManager
+        and all guard checks. They use PlanningContext to generate and
+        execute multi-step plans.
+        """
+        if not getattr(self.context, "_enable_plan_tools", False):
+            self._add_tool_result_to_history(
+                llm_tool_name, call_id, "Plan tools are not enabled."
+            )
+            return
+
+        logger.info("Plan tool %s called with args: %s", tool_name, arguments)
+
+        from mcp_cli.planning.context import PlanningContext
+        from mcp_cli.planning.tools import handle_plan_tool
+
+        # Lazy-create PlanningContext (cached on context object)
+        planning_context = getattr(self.context, "_planning_context", None)
+        if planning_context is None:
+            planning_context = PlanningContext(self.context.tool_manager)
+            self.context._planning_context = planning_context
+
+        # Get model_manager for LLM-driven step execution
+        model_manager = getattr(self.context, "model_manager", None)
+
+        # Pass the UI manager so handle_plan_tool can show step-by-step progress
+        result_text = await handle_plan_tool(
+            tool_name,
+            arguments,
+            planning_context,
+            model_manager,
+            ui_manager=self.ui_manager,
+        )
+
+        self._add_tool_result_to_history(llm_tool_name, call_id, result_text)
+
     async def _handle_vm_tool(
         self,
         tool_name: str,
diff --git a/src/mcp_cli/commands/__init__.py b/src/mcp_cli/commands/__init__.py
index d3785f26..4992a150 100644
--- a/src/mcp_cli/commands/__init__.py
+++ b/src/mcp_cli/commands/__init__.py
@@ -108,6 +108,7 @@ def register_all_commands() -> None:
     from mcp_cli.commands.sessions import SessionsCommand
     from mcp_cli.commands.apps import AppsCommand
     from mcp_cli.commands.memory import MemoryCommand
+    from mcp_cli.commands.plan import PlanCommand
 
     # Register basic commands
     registry.register(HelpCommand())
@@ -155,6 +156,9 @@ def register_all_commands() -> None:
     # Register VM visualization command (chat mode only)
     registry.register(MemoryCommand())
 
+    # Register plan command
+    registry.register(PlanCommand())
+
     # All commands have been migrated!
     # - tools (with subcommands: list, call, confirm)
     # - provider (with subcommands: list, set, show)
diff --git a/src/mcp_cli/commands/plan/__init__.py b/src/mcp_cli/commands/plan/__init__.py
new file mode 100644
index 00000000..6511bfd5
--- /dev/null
+++ b/src/mcp_cli/commands/plan/__init__.py
@@ -0,0 +1,5 @@
+"""Plan command."""
+
+from mcp_cli.commands.plan.plan import PlanCommand
+
+__all__ = ["PlanCommand"]
diff --git a/src/mcp_cli/commands/plan/plan.py b/src/mcp_cli/commands/plan/plan.py
new file mode 100644
index 00000000..a9640f81
--- /dev/null
+++ b/src/mcp_cli/commands/plan/plan.py
@@ -0,0 +1,482 @@
+# src/mcp_cli/commands/plan/plan.py
+"""Plan management command — create, list, show, run, delete, resume plans."""
+
+from __future__ import annotations
+
+import logging
+
+from mcp_cli.commands.base import (
+    UnifiedCommand,
+    CommandMode,
+    CommandParameter,
+    CommandResult,
+)
+from mcp_cli.config.enums import PlanAction
+
+logger = logging.getLogger(__name__)
+
+# Module-level cache: tool_manager id → PlanningContext
+_planning_context_cache: dict[int, object] = {}
+
+
+class PlanCommand(UnifiedCommand):
+    """Manage execution plans."""
+
+    @property
+    def name(self) -> str:
+        return "plan"
+
+    @property
+    def aliases(self) -> list[str]:
+        return ["plans"]
+
+    @property
+    def description(self) -> str:
+        return "Create, inspect, and execute plans"
+
+    @property
+    def help_text(self) -> str:
+        return """
+Manage execution plans — reproducible, inspectable tool call graphs.
+
+Usage:
+  /plan                          - List all saved plans
+  /plan list                     - List all saved plans
+  /plan create <description>     - Generate a plan from a description
+  /plan show <id>                - Show plan details
+  /plan run <id>                 - Execute a plan
+  /plan run <id> --dry-run       - Trace without executing
+  /plan delete <id>              - Delete a plan
+  /plan resume <id>              - Resume an interrupted plan
+"""
+
+    @property
+    def modes(self) -> CommandMode:
+        return CommandMode.ALL
+
+    @property
+    def parameters(self) -> list[CommandParameter]:
+        return [
+            CommandParameter(
+                name="action",
+                type=str,
+                required=False,
+                help="Action: create, list, show, run, delete, resume",
+            ),
+            CommandParameter(
+                name="plan_id_or_description",
+                type=str,
+                required=False,
+                help="Plan ID or description (for create)",
+            ),
+        ]
+
+    async def execute(self, **kwargs) -> CommandResult:
+        """Execute the plan command."""
+        # Parse args — chat adapter passes a list, interactive passes a string
+        args_val = kwargs.get("args", "")
+        if isinstance(args_val, list):
+            args_str = " ".join(str(a) for a in args_val)
+        else:
+            args_str = str(args_val).strip()
+        parts = args_str.split(maxsplit=1)
+        action = parts[0] if parts else PlanAction.LIST
+        remainder = parts[1] if len(parts) > 1 else ""
+
+        # Get tool_manager from kwargs
+        tool_manager = kwargs.get("tool_manager")
+        if not tool_manager:
+            return CommandResult(
+                success=False,
+                error="Tool manager not available. Plans require an active session.",
+            )
+
+        # Lazy-create planning context
+        planning_context = await self._get_planning_context(tool_manager)
+
+        if action == PlanAction.LIST:
+            return await self._list_plans(planning_context)
+
+        elif action == PlanAction.CREATE:
+            if not remainder:
+                return CommandResult(
+                    success=False,
+                    error="Description required. Usage: /plan create <description>",
+                )
+            return await self._create_plan(planning_context, remainder, kwargs)
+
+        elif action == PlanAction.SHOW:
+            if not remainder:
+                return CommandResult(
+                    success=False,
+                    error="Plan ID required. Usage: /plan show <id>",
+                )
+            return await self._show_plan(planning_context, remainder)
+
+        elif action == PlanAction.RUN:
+            if not remainder:
+                return CommandResult(
+                    success=False,
+                    error="Plan ID required. Usage: /plan run <id>",
+                )
+            dry_run = "--dry-run" in remainder or "--simulate" in remainder
+            plan_id = remainder.split()[0]
+            return await self._run_plan(
+                planning_context, plan_id, dry_run=dry_run, kwargs=kwargs
+            )
+
+        elif action == PlanAction.DELETE:
+            if not remainder:
+                return CommandResult(
+                    success=False,
+                    error="Plan ID required. Usage: /plan delete <id>",
+                )
+            return await self._delete_plan(planning_context, remainder.strip())
+
+        elif action == PlanAction.RESUME:
+            if not remainder:
+                return CommandResult(
+                    success=False,
+                    error="Plan ID required. Usage: /plan resume <id>",
+                )
+            return await self._resume_plan(planning_context, remainder.strip())
+
+        else:
+            return CommandResult(
+                success=False,
+                error=f"Unknown action: {action}. Use create, list, show, run, delete, or resume.",
+            )
+
+    async def _get_planning_context(self, tool_manager):
+        """Get or create PlanningContext."""
+        from mcp_cli.planning.context import PlanningContext
+
+        tm_id = id(tool_manager)
+        if tm_id not in _planning_context_cache:
+            _planning_context_cache[tm_id] = PlanningContext(tool_manager)
+        return _planning_context_cache[tm_id]
+
+    async def _list_plans(self, context) -> CommandResult:
+        """List all saved plans."""
+        from chuk_term.ui import output, format_table
+
+        plans = await context.list_plans()
+        if not plans:
+            output.info("No saved plans. Use /plan create <description> to create one.")
+            return CommandResult(success=True, output="No saved plans.")
+
+        table_data = []
+        for p in plans:
+            step_count = len(p.get("steps", []))
+            table_data.append(
+                {
+                    "ID": p.get("id", "?")[:8] + "...",
+                    "Title": p.get("title", "Untitled")[:50],
+                    "Steps": str(step_count),
+                }
+            )
+
+        table = format_table(
+            table_data,
+            title="Saved Plans",
+            columns=["ID", "Title", "Steps"],
+        )
+        output.print_table(table)
+        return CommandResult(success=True, data=table_data)
+
+    async def _create_plan(self, context, description: str, kwargs) -> CommandResult:
+        """Generate a plan from a natural language description."""
+        from chuk_term.ui import output
+
+        output.info(f"Generating plan: {description}")
+
+        try:
+            # Get full tool catalog (with parameter schemas) for the system prompt
+            tool_catalog = await context.get_tool_catalog()
+            tool_names = [
+                t.get("function", {}).get("name", "")
+                for t in tool_catalog
+                if t.get("function", {}).get("name")
+            ]
+            if not tool_names:
+                return CommandResult(
+                    success=False,
+                    error="No tools available. Connect to MCP servers first.",
+                )
+
+            # Build system prompt with tool schemas
+            system_prompt = _build_plan_system_prompt(tool_catalog)
+
+            # Use PlanAgent to generate the plan
+            from chuk_ai_planner.agents.plan_agent import PlanAgent
+
+            agent = PlanAgent(
+                system_prompt=system_prompt,
+                validate_step=lambda step: _validate_step(step, tool_names),
+                max_retries=2,
+            )
+
+            plan_dict = await agent.plan(description)
+
+            if not plan_dict or not plan_dict.get("steps"):
+                return CommandResult(
+                    success=False,
+                    error="Failed to generate a valid plan.",
+                )
+
+            # Save via PlanningContext (builds UniversalPlan + registers)
+            plan_id = await context.save_plan_from_dict(plan_dict)
+
+            # Display the saved plan (has patched 1-based dependencies)
+            saved_plan = await context.get_plan(plan_id)
+            _display_plan(saved_plan or plan_dict)
+
+            output.success(f"Plan saved: {plan_id[:8]}...")
+            return CommandResult(
+                success=True,
+                output=f"Plan '{plan_dict.get('title', 'Untitled')}' created with {len(plan_dict['steps'])} steps.",
+                data={"plan_id": plan_id},
+            )
+
+        except Exception as e:
+            logger.error("Plan creation failed: %s", e)
+            return CommandResult(
+                success=False,
+                error=f"Plan creation failed: {e}",
+            )
+
+    async def _show_plan(self, context, plan_id: str) -> CommandResult:
+        """Show details of a plan."""
+        plan_data = await context.get_plan(plan_id)
+        if not plan_data:
+            return CommandResult(
+                success=False,
+                error=f"Plan not found: {plan_id}",
+            )
+
+        _display_plan(plan_data)
+        return CommandResult(success=True, data=plan_data)
+
+    async def _run_plan(
+        self,
+        context,
+        plan_id: str,
+        *,
+        dry_run: bool = False,
+        kwargs: dict | None = None,
+    ) -> CommandResult:
+        """Execute a plan."""
+        from chuk_term.ui import output
+        from mcp_cli.planning.executor import PlanRunner
+
+        plan_data = await context.get_plan(plan_id)
+        if not plan_data:
+            return CommandResult(
+                success=False,
+                error=f"Plan not found: {plan_id}",
+            )
+
+        mode_label = "[DRY RUN] " if dry_run else ""
+        output.info(f"{mode_label}Executing plan: {plan_data.get('title', 'Untitled')}")
+
+        # Get display manager for tool execution rendering (matches regular chat display)
+        ui_manager = (kwargs or {}).get("ui_manager")
+        display = getattr(ui_manager, "display", None) if ui_manager else None
+
+        def on_step_start(index, title, tool):
+            output.info(f"  Step {index}: {title}")
+
+        def on_step_complete(step_result):
+            # Tool results are shown by on_tool_complete — step complete is a summary
+            if not step_result.success:
+                output.error(
+                    f"  Step {step_result.step_index} failed: {step_result.error}"
+                )
+
+        async def on_tool_start(tool_name, arguments):
+            if display:
+                await display.start_tool_execution(tool_name, arguments)
+
+        async def on_tool_complete(tool_name, result_text, success, elapsed):
+            if display:
+                await display.stop_tool_execution(result_text, success)
+
+        # Get model_manager for LLM-driven execution
+        model_manager = (kwargs or {}).get("model_manager")
+
+        runner = PlanRunner(
+            context,
+            model_manager=model_manager,
+            on_step_start=on_step_start,
+            on_step_complete=on_step_complete,
+            on_tool_start=on_tool_start,
+            on_tool_complete=on_tool_complete,
+        )
+
+        result = await runner.execute_plan(plan_data, dry_run=dry_run)
+
+        if result.success:
+            output.success(
+                f"Plan completed: {len(result.steps)} steps in {result.total_duration:.1f}s"
+            )
+        else:
+            output.error(f"Plan failed: {result.error or 'unknown error'}")
+
+        return CommandResult(
+            success=result.success,
+            output=f"{'[DRY RUN] ' if dry_run else ''}Plan {'completed' if result.success else 'failed'}",
+            data={
+                "plan_result": {
+                    "success": result.success,
+                    "steps": len(result.steps),
+                    "duration": result.total_duration,
+                    "variables": result.variables,
+                }
+            },
+        )
+
+    async def _delete_plan(self, context, plan_id: str) -> CommandResult:
+        """Delete a plan."""
+        from chuk_term.ui import output
+
+        if await context.delete_plan(plan_id):
+            output.success(f"Plan deleted: {plan_id}")
+            return CommandResult(success=True, output=f"Deleted {plan_id}")
+        return CommandResult(
+            success=False,
+            error=f"Plan not found: {plan_id}",
+        )
+
+    async def _resume_plan(self, context, plan_id: str) -> CommandResult:
+        """Resume an interrupted plan."""
+        from chuk_term.ui import output
+        from mcp_cli.planning.executor import PlanRunner
+
+        plan_data = await context.get_plan(plan_id)
+        if not plan_data:
+            return CommandResult(
+                success=False,
+                error=f"Plan not found: {plan_id}",
+            )
+
+        runner = PlanRunner(context)
+        checkpoint = runner.load_checkpoint(plan_id)
+
+        if not checkpoint:
+            return CommandResult(
+                success=False,
+                error=f"No checkpoint found for plan {plan_id}. Use /plan run instead.",
+            )
+
+        completed = checkpoint.get("completed_steps", [])
+        output.info(
+            f"Resuming plan: {plan_data.get('title', 'Untitled')} "
+            f"({len(completed)} steps already completed)"
+        )
+
+        # Filter out completed steps
+        remaining_steps = [
+            s for s in plan_data.get("steps", []) if s.get("index") not in completed
+        ]
+        plan_data = dict(plan_data)  # Don't mutate the original
+        plan_data["steps"] = remaining_steps
+
+        result = await runner.execute_plan(
+            plan_data,
+            variables=checkpoint.get("variables", {}),
+        )
+
+        if result.success:
+            output.success("Plan resumed and completed successfully.")
+        else:
+            output.error(f"Plan resume failed: {result.error}")
+
+        return CommandResult(success=result.success)
+
+
+def _build_plan_system_prompt(tool_catalog: list[dict]) -> str:
+    """Build the system prompt for LLM plan generation.
+
+    Args:
+        tool_catalog: Full tool definitions with schemas (OpenAI function format).
+    """
+
+    # Format each tool with its parameter schema
+    tool_lines = []
+    for tool in tool_catalog:
+        func = tool.get("function", {})
+        name = func.get("name", "?")
+        desc = func.get("description", "")
+        params = func.get("parameters", {})
+        props = params.get("properties", {})
+        required = params.get("required", [])
+
+        param_parts = []
+        for pname, pinfo in props.items():
+            ptype = pinfo.get("type", "any")
+            pdesc = pinfo.get("description", "")
+            req = " (required)" if pname in required else ""
+            param_parts.append(f"      {pname}: {ptype}{req} — {pdesc}")
+
+        params_str = "\n".join(param_parts) if param_parts else "      (no parameters)"
+        tool_lines.append(f"  {name}: {desc}\n    Parameters:\n{params_str}")
+
+    tools_text = "\n\n".join(tool_lines)
+
+    return f"""You are a planning assistant. Given a task description, create a structured execution plan.
+
+Available tools (with parameter schemas):
+
+{tools_text}
+
+Output a JSON object with this exact structure:
+{{
+  "title": "Short plan title",
+  "steps": [
+    {{
+      "title": "What this step does",
+      "tool": "tool_name",
+      "args": {{"arg1": "value1"}},
+      "depends_on": [],
+      "result_variable": "optional_var_name"
+    }}
+  ]
+}}
+
+Rules:
+- Only use tools from the available tools list above
+- Use the EXACT parameter names shown in the tool schemas
+- depends_on is a list of step indices (0-based) that must complete first
+- result_variable stores the output for use in later steps as ${{var_name}}
+- Keep plans focused — prefer fewer, targeted steps over many small ones
+- Each step should have exactly one tool call"""
+
+
+def _validate_step(step: dict, tool_names: list[str]) -> tuple[bool, str]:
+    """Validate a plan step against available tools."""
+    tool = step.get("tool", "")
+    if tool not in tool_names:
+        return False, f"Unknown tool: {tool}. Available: {', '.join(tool_names[:10])}"
+    if not step.get("title"):
+        return False, "Step must have a title"
+    return True, ""
+
+
+def _display_plan(plan_data: dict) -> None:
+    """Display a plan in the terminal with DAG visualization."""
+    from chuk_term.ui import output
+    from mcp_cli.planning.executor import render_plan_dag
+
+    title = plan_data.get("title", "Untitled Plan")
+    steps = plan_data.get("steps", [])
+
+    output.info(f"\nPlan: {title} ({len(steps)} steps)\n")
+
+    # Render DAG
+    dag = render_plan_dag(plan_data)
+    output.info(dag)
+
+    result_vars = [s.get("result_variable") for s in steps if s.get("result_variable")]
+    if result_vars:
+        output.info(f"\nVariables: {', '.join(result_vars)}")
+    output.info("")
diff --git a/src/mcp_cli/config/defaults.py b/src/mcp_cli/config/defaults.py
index 0219ead1..2f034f17 100644
--- a/src/mcp_cli/config/defaults.py
+++ b/src/mcp_cli/config/defaults.py
@@ -343,6 +343,38 @@
 """Maximum characters for memory section in system prompt."""
 
 
+# ================================================================
+# Planning Defaults (Tier 6)
+# ================================================================
+
+DEFAULT_PLANS_DIR = "~/.mcp-cli/plans"
+"""Default directory for plan persistence."""
+
+DEFAULT_ENABLE_PLAN_TOOLS = False
+"""Enable plan_create / plan_execute / plan_create_and_execute as LLM-callable tools."""
+
+DEFAULT_PLAN_MAX_CONCURRENCY = 4
+"""Maximum concurrent steps within a parallel batch."""
+
+DEFAULT_PLAN_MAX_REPLANS = 2
+"""Maximum number of re-plan attempts on step failure."""
+
+DEFAULT_PLAN_MAX_STEP_RETRIES = 2
+"""Maximum LLM retry attempts per plan step on tool failure."""
+
+DEFAULT_PLAN_VARIABLE_SUMMARY_MAX_CHARS = 500
+"""Maximum characters per variable in LLM variable summary."""
+
+DEFAULT_PLAN_CHECKPOINT_MAX_CHARS = 1000
+"""Maximum characters per variable in checkpoint serialization."""
+
+DEFAULT_PLAN_ERROR_MESSAGE_MAX_CHARS = 200
+"""Maximum characters for error messages in plan execution results."""
+
+DEFAULT_PLAN_DAG_TITLE_MAX_CHARS = 35
+"""Maximum characters for step titles in DAG visualization."""
+
+
 # ================================================================
 # Logging Defaults
 # ================================================================
diff --git a/src/mcp_cli/config/enums.py b/src/mcp_cli/config/enums.py
index 2028ab8f..e0c69cca 100644
--- a/src/mcp_cli/config/enums.py
+++ b/src/mcp_cli/config/enums.py
@@ -122,6 +122,26 @@ class SessionAction(str, Enum):
     DELETE = "delete"
 
 
+class PlanAction(str, Enum):
+    """Actions for /plan command."""
+
+    CREATE = "create"
+    LIST = "list"
+    SHOW = "show"
+    RUN = "run"
+    DELETE = "delete"
+    RESUME = "resume"
+
+
+class PlanStatus(str, Enum):
+    """Plan execution status values."""
+
+    PENDING = "pending"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    FAILED = "failed"
+
+
 class ServerAction(str, Enum):
     """Actions for /server command."""
 
@@ -158,6 +178,8 @@ class ThemeAction(str, Enum):
     "ServerStatus",
     # Command action enums
     "ConversationAction",
+    "PlanAction",
+    "PlanStatus",
     "SessionAction",
     "TokenAction",
     "ServerAction",
diff --git a/src/mcp_cli/main.py b/src/mcp_cli/main.py
index bd07ee87..99c41e3f 100644
--- a/src/mcp_cli/main.py
+++ b/src/mcp_cli/main.py
@@ -145,6 +145,11 @@ def main_callback(
         "--health-interval",
         help="Background server health check interval in seconds (0 = disabled)",
     ),
+    plan_tools: bool = typer.Option(
+        False,
+        "--plan-tools",
+        help="Enable plan_create/plan_execute as LLM-callable tools for autonomous multi-step planning",
+    ),
 ) -> None:
     """MCP CLI - If no subcommand is given, start chat mode."""
 
@@ -371,6 +376,7 @@ async def _start_chat():
                 vm_mode=vm_mode,
                 vm_budget=vm_budget,
                 health_interval=health_interval,
+                enable_plan_tools=plan_tools,
             )
             logger.debug(f"Chat mode completed with success: {success}")
         except asyncio.TimeoutError:
@@ -470,6 +476,11 @@ def _chat_command(
         "--health-interval",
         help="Background server health check interval in seconds (0 = disabled)",
     ),
+    plan_tools: bool = typer.Option(
+        False,
+        "--plan-tools",
+        help="Enable plan_create/plan_execute as LLM-callable tools for autonomous multi-step planning",
+    ),
 ) -> None:
     """Start chat mode (same as default behavior without subcommand)."""
     # Re-configure logging based on user options
@@ -604,6 +615,7 @@ async def _start_chat():
                 vm_mode=vm_mode,
                 vm_budget=vm_budget,
                 health_interval=health_interval,
+                enable_plan_tools=plan_tools,
             )
             logger.debug(f"Chat mode completed with success: {success}")
         except asyncio.TimeoutError:
diff --git a/src/mcp_cli/planning/__init__.py b/src/mcp_cli/planning/__init__.py
new file mode 100644
index 00000000..f0b0cd25
--- /dev/null
+++ b/src/mcp_cli/planning/__init__.py
@@ -0,0 +1,32 @@
+# src/mcp_cli/planning/__init__.py
+"""Plan-based execution for mcp-cli.
+
+Integrates chuk-ai-planner's graph-based plan DSL with mcp-cli's
+MCP tool execution layer. Plans make workflows reproducible,
+inspectable, and resumable.
+
+Key components:
+- McpToolBackend: bridges planner to ToolManager with guard integration
+- PlanningContext: state container for plan operations
+- PlanRunner: orchestrates plan execution with parallel batches, checkpointing, re-planning
+- render_plan_dag: ASCII DAG visualization for terminal display
+- PlanExecutionResult / StepResult: structured execution results
+"""
+
+from mcp_cli.planning.backends import McpToolBackend
+from mcp_cli.planning.context import PlanningContext
+from mcp_cli.planning.executor import (
+    PlanRunner,
+    PlanExecutionResult,
+    StepResult,
+    render_plan_dag,
+)
+
+__all__ = [
+    "McpToolBackend",
+    "PlanningContext",
+    "PlanRunner",
+    "PlanExecutionResult",
+    "StepResult",
+    "render_plan_dag",
+]
diff --git a/src/mcp_cli/planning/backends.py b/src/mcp_cli/planning/backends.py
new file mode 100644
index 00000000..62acfc78
--- /dev/null
+++ b/src/mcp_cli/planning/backends.py
@@ -0,0 +1,387 @@
+# src/mcp_cli/planning/backends.py
+"""McpToolBackend — bridges chuk-ai-planner to mcp-cli's ToolManager.
+
+The planner defines a ToolExecutionBackend protocol. The existing
+ToolProcessorBackend calls CTP.process() with fake OpenAI payloads,
+which works for registered Python functions. This backend instead calls
+ToolManager.execute_tool(), routing to real MCP servers.
+
+Guard integration: before each tool call, checks mcp-cli's guard system
+(budget, runaway, per-tool limits). After each call, records the result
+for value binding and budget tracking.
+
+Same protocol interface, different execution path.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import time
+from typing import TYPE_CHECKING, Any
+
+from chuk_ai_planner.execution.models import (
+    ToolExecutionRequest,
+    ToolExecutionResult,
+)
+
+from mcp_cli.config.defaults import DEFAULT_PLAN_ERROR_MESSAGE_MAX_CHARS
+from mcp_cli.llm.content_models import ContentBlockType
+
+if TYPE_CHECKING:
+    from mcp_cli.tools.manager import ToolManager
+
+logger = logging.getLogger(__name__)
+
+
+class McpToolBackend:
+    """Planner → mcp-cli ToolManager adapter with guard integration.
+
+    Implements the ToolExecutionBackend protocol so that
+    chuk-ai-planner's UniversalExecutor can execute tools
+    on real MCP servers via mcp-cli's ToolManager.
+
+    Guard checks (budget, per-tool limits, runaway detection) are
+    enforced before each call. Results are recorded for value binding
+    and budget tracking after each call.
+    """
+
+    def __init__(
+        self,
+        tool_manager: ToolManager,
+        *,
+        namespace: str | None = None,
+        enable_guards: bool = True,
+    ) -> None:
+        """Initialize the MCP tool backend.
+
+        Args:
+            tool_manager: The ToolManager instance for MCP tool execution.
+            namespace: Optional namespace prefix for tool names.
+            enable_guards: If True, check guards before each tool call.
+        """
+        self._tool_manager = tool_manager
+        self._namespace = namespace
+        self._enable_guards = enable_guards
+
+    async def execute_tool(self, request: ToolExecutionRequest) -> ToolExecutionResult:
+        """Execute a tool via mcp-cli's ToolManager with guard checks.
+
+        Args:
+            request: Planner's execution request (tool_name, args, step_id).
+
+        Returns:
+            ToolExecutionResult with the tool output or error.
+        """
+        start_time = time.perf_counter()
+
+        # Apply namespace if configured
+        tool_name = (
+            f"{self._namespace}__{request.tool_name}"
+            if self._namespace
+            else request.tool_name
+        )
+
+        logger.debug(
+            "Plan step %s: executing tool %s with args %s",
+            request.step_id,
+            tool_name,
+            list(request.args.keys()),
+        )
+
+        # --- Guard checks (pre-execution) ---
+        if self._enable_guards:
+            guard_error = _check_guards(tool_name, request.args)
+            if guard_error:
+                duration = time.perf_counter() - start_time
+                logger.warning(
+                    "Plan step %s: tool %s blocked by guard: %s",
+                    request.step_id,
+                    tool_name,
+                    guard_error,
+                )
+                return ToolExecutionResult(
+                    tool_name=request.tool_name,
+                    result=None,
+                    error=f"Guard blocked: {guard_error}",
+                    duration=duration,
+                    cached=False,
+                )
+
+        try:
+            # Execute through ToolManager → StreamManager → MCP server
+            result = await self._tool_manager.execute_tool(
+                tool_name,
+                request.args,
+                namespace=self._namespace,
+            )
+
+            duration = time.perf_counter() - start_time
+
+            # ToolManager marks success=True even when the MCP server
+            # returns an error payload (JSON-RPC error, isError flag).
+            # Detect these false-positive successes here.
+            actual_success = result.success and not _is_error_result(result.result)
+
+            if actual_success:
+                extracted = _extract_result(result.result)
+
+                # --- Post-execution recording ---
+                if self._enable_guards:
+                    _record_result(tool_name, request.args, extracted)
+
+                logger.debug(
+                    "Plan step %s: tool %s completed in %.2fs",
+                    request.step_id,
+                    tool_name,
+                    duration,
+                )
+                return ToolExecutionResult(
+                    tool_name=request.tool_name,
+                    result=extracted,
+                    error=None,
+                    duration=duration,
+                    cached=False,
+                )
+            else:
+                error_msg = (
+                    result.error
+                    or _extract_error_message(result.result)
+                    or "Tool execution failed"
+                )
+                logger.warning(
+                    "Plan step %s: tool %s failed: %s",
+                    request.step_id,
+                    tool_name,
+                    error_msg,
+                )
+                return ToolExecutionResult(
+                    tool_name=request.tool_name,
+                    result=None,
+                    error=error_msg,
+                    duration=duration,
+                    cached=False,
+                )
+
+        except Exception as e:
+            duration = time.perf_counter() - start_time
+            logger.error(
+                "Plan step %s: tool %s raised exception: %s",
+                request.step_id,
+                tool_name,
+                e,
+            )
+            return ToolExecutionResult(
+                tool_name=request.tool_name,
+                result=None,
+                error=str(e),
+                duration=duration,
+                cached=False,
+            )
+
+
+def _check_guards(tool_name: str, arguments: dict[str, Any]) -> str | None:
+    """Run mcp-cli's guard checks before tool execution.
+
+    Returns an error message if blocked, None if allowed.
+    """
+    try:
+        from chuk_ai_session_manager.guards import get_tool_state
+
+        tool_state = get_tool_state()
+        if tool_state is None:
+            return None
+
+        # Per-tool cap check
+        cap_result = tool_state.check_per_tool_limit(tool_name)
+        if hasattr(tool_state, "limits") and tool_state.limits.per_tool_cap > 0:
+            if cap_result.blocked:
+                return cap_result.reason or f"Per-tool limit reached for {tool_name}"
+
+        # Budget check via check_all_guards (runs precondition + budget + ungrounded)
+        guard_result = tool_state.check_all_guards(tool_name, arguments)
+        if guard_result.blocked:
+            return guard_result.reason or "Guard check failed"
+
+        return None
+
+    except ImportError:
+        logger.debug("Guards not available (chuk_ai_session_manager not installed)")
+        return None
+    except Exception as e:
+        logger.debug("Guard check failed (non-fatal): %s", e)
+        return None
+
+
+def _record_result(tool_name: str, arguments: dict[str, Any], result: Any) -> None:
+    """Record tool execution result in the guard system.
+
+    Binds the result as a $vN value and increments budget counters.
+    """
+    try:
+        from chuk_ai_session_manager.guards import get_tool_state
+
+        tool_state = get_tool_state()
+        if tool_state is None:
+            return
+
+        # Bind result as $vN for reference in subsequent tools
+        tool_state.bind_value(tool_name, arguments, result)
+
+        # Record for budget tracking
+        tool_state.record_tool_call(tool_name)
+
+        # Feed numeric results to runaway guard
+        if isinstance(result, (int, float)):
+            tool_state.record_numeric_result(float(result))
+
+    except ImportError:
+        pass
+    except Exception as e:
+        logger.debug("Result recording failed (non-fatal): %s", e)
+
+
+def _is_error_result(raw: Any) -> bool:
+    """Check if a ToolManager result is actually an error.
+
+    ToolManager.execute_tool marks success=True when stream_manager.call_tool
+    returns without exception, even if the MCP server responded with an error.
+    Detect these false-positive successes.
+
+    When CTP middleware is enabled, StreamManager.call_tool() returns a
+    chuk_tool_processor ToolExecutionResult object (not a dict). ToolManager
+    wraps this as ToolCallResult(success=True, result=<CTP ToolExecutionResult>).
+    We detect these by checking for a 'success' attribute set to False.
+    """
+    if raw is None:
+        return False
+
+    # CTP ToolExecutionResult or similar objects with success=False
+    # (from chuk_tool_processor.mcp.middleware when middleware is enabled)
+    if hasattr(raw, "success") and hasattr(raw, "error"):
+        if not raw.success:
+            return True
+
+    # MCP CallToolResult with isError flag (object or dict)
+    if hasattr(raw, "isError") and raw.isError:
+        return True
+    if isinstance(raw, dict):
+        if raw.get("isError"):
+            return True
+        # Check nested content for isError
+        if "content" in raw and hasattr(raw["content"], "isError"):
+            if raw["content"].isError:
+                return True
+
+    # MCP error content blocks
+    if isinstance(raw, list):
+        for block in raw:
+            if isinstance(block, dict) and block.get("isError"):
+                return True
+
+    return False
+
+
+def _extract_error_message(raw: Any) -> str | None:
+    """Extract a human-readable error message from an MCP error result."""
+    if raw is None:
+        return None
+
+    # CTP ToolExecutionResult with .error attribute
+    if hasattr(raw, "error") and raw.error:
+        return str(raw.error)
+
+    # Extract text from MCP content blocks
+    if isinstance(raw, list):
+        for block in raw:
+            if isinstance(block, dict) and block.get("type") == ContentBlockType.TEXT:
+                return str(block.get("text", ""))
+
+    text = str(raw)
+    if len(text) > DEFAULT_PLAN_ERROR_MESSAGE_MAX_CHARS:
+        text = text[:DEFAULT_PLAN_ERROR_MESSAGE_MAX_CHARS] + "..."
+    return text
+
+
+def _extract_result(raw: Any) -> Any:
+    """Extract a clean result value from ToolCallResult.result.
+
+    MCP tool results come in several forms depending on the execution path:
+
+    1. CTP middleware: ToolExecutionResult(success, result, error)
+    2. MCP dict wrapper: {"isError": False, "content": ToolResult(...)}
+    3. MCP ToolResult object: ToolResult(content=[{type, text}, ...])
+    4. Content block list: [{"type": "text", "text": "..."}, ...]
+    5. JSON string: '{"results": [...]}'
+    6. Plain value: string, int, etc.
+
+    This function unwraps all layers to return clean, usable data.
+    JSON strings are parsed into dicts/lists for easier downstream use.
+    """
+    if raw is None:
+        return None
+
+    # Unwrap CTP ToolExecutionResult (has success, result, error attrs)
+    if hasattr(raw, "success") and hasattr(raw, "result") and hasattr(raw, "error"):
+        if raw.success:
+            return _extract_result(raw.result)  # Recurse to handle nested results
+        return None  # Error case — caller should check _is_error_result first
+
+    # Dict with "content" key (MCP CallToolResult as dict)
+    # e.g. {"isError": False, "content": ToolResult(content=[...])}
+    if isinstance(raw, dict) and "content" in raw:
+        return _extract_result(raw["content"])
+
+    # Object with .content attribute (MCP ToolResult / CallToolResult)
+    # e.g. ToolResult(content=[{"type": "text", "text": "..."}])
+    if hasattr(raw, "content") and not isinstance(raw, (str, bytes)):
+        content = raw.content
+        if isinstance(content, list):
+            return _extract_content_blocks(content)
+        return _extract_result(content)
+
+    # List of content blocks (MCP style)
+    if isinstance(raw, list):
+        return _extract_content_blocks(raw)
+
+    # JSON string → parse to dict/list for cleaner variable access
+    if isinstance(raw, str):
+        return _try_parse_json(raw)
+
+    return raw
+
+
+def _extract_content_blocks(blocks: list) -> Any:
+    """Extract text from a list of MCP content blocks.
+
+    Content blocks can be dicts or objects with type/text attributes.
+    Returns parsed JSON if the text is valid JSON, otherwise raw text.
+    """
+    texts = []
+    for block in blocks:
+        # Dict content block: {"type": "text", "text": "..."}
+        if isinstance(block, dict) and block.get("type") == ContentBlockType.TEXT:
+            texts.append(block.get("text", ""))
+        # Object content block: block.type == "text", block.text == "..."
+        elif hasattr(block, "type") and hasattr(block, "text"):
+            if str(block.type) == ContentBlockType.TEXT:
+                texts.append(str(block.text))
+        elif isinstance(block, str):
+            texts.append(block)
+
+    if texts:
+        combined = "\n".join(texts) if len(texts) > 1 else texts[0]
+        return _try_parse_json(combined)
+
+    return blocks  # Return raw if no text blocks found
+
+
+def _try_parse_json(text: str) -> Any:
+    """Try to parse a string as JSON. Returns parsed value or original string."""
+    if not text or not text.strip():
+        return text
+    try:
+        parsed = json.loads(text)
+        return parsed
+    except (json.JSONDecodeError, TypeError, ValueError):
+        return text
diff --git a/src/mcp_cli/planning/context.py b/src/mcp_cli/planning/context.py
new file mode 100644
index 00000000..32e8bb0e
--- /dev/null
+++ b/src/mcp_cli/planning/context.py
@@ -0,0 +1,365 @@
+# src/mcp_cli/planning/context.py
+"""PlanningContext — state container for plan operations.
+
+Holds the graph store, plan registry, tool manager reference,
+and provides convenience methods for plan CRUD and tool catalog access.
+
+Wraps chuk-ai-planner's async PlanRegistry API, converting between
+UniversalPlan objects and plain dicts for the command layer.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+from chuk_ai_planner.core.store.memory import InMemoryGraphStore
+from chuk_ai_planner.core.planner.plan_registry import PlanRegistry
+from chuk_ai_planner.core.planner.universal_plan import UniversalPlan
+
+from mcp_cli.config.defaults import DEFAULT_PLANS_DIR
+
+if TYPE_CHECKING:
+    from mcp_cli.tools.manager import ToolManager
+
+logger = logging.getLogger(__name__)
+
+
+class PlanningContext:
+    """State container for plan operations.
+
+    Centralizes access to the graph store, plan registry,
+    and tool manager. Passed to the PlanRunner and PlanCommand.
+    """
+
+    def __init__(
+        self,
+        tool_manager: ToolManager,
+        *,
+        plans_dir: Path | None = None,
+    ) -> None:
+        """Initialize planning context.
+
+        Args:
+            tool_manager: The ToolManager for MCP tool execution and catalog.
+            plans_dir: Directory for plan persistence. Defaults to ~/.mcp-cli/plans/
+        """
+        self.tool_manager = tool_manager
+        self.plans_dir = plans_dir or Path(DEFAULT_PLANS_DIR).expanduser()
+        self.graph_store = InMemoryGraphStore()
+        self.plan_registry = PlanRegistry(str(self.plans_dir))
+
+        # Ensure plans directory exists
+        self.plans_dir.mkdir(parents=True, exist_ok=True)
+
+        logger.debug("PlanningContext initialized, plans_dir=%s", self.plans_dir)
+
+    async def get_tool_catalog(self, provider: str = "openai") -> list[dict[str, Any]]:
+        """Get the available tool catalog for LLM plan generation.
+
+        Args:
+            provider: LLM provider format for tool adaptation.
+
+        Returns:
+            List of tool definitions with name, description, and parameters.
+        """
+        try:
+            result = await self.tool_manager.get_adapted_tools_for_llm(provider)
+            # get_adapted_tools_for_llm returns (tools, namespace_map) tuple
+            tools: list[dict[str, Any]] = (
+                result[0] if isinstance(result, tuple) else result
+            )
+            return tools
+        except Exception as e:
+            logger.warning("Failed to get tool catalog: %s", e)
+            return []
+
+    async def get_tool_names(self) -> list[str]:
+        """Get list of available tool names for plan validation.
+
+        Returns:
+            List of tool name strings.
+        """
+        try:
+            all_tools = await self.tool_manager.get_all_tools()
+            return [t.name for t in all_tools]
+        except Exception as e:
+            logger.warning("Failed to get tool names: %s", e)
+            return []
+
+    def _is_plan_id(self, registry_key: str) -> bool:
+        """Check if a registry key is a real plan ID (not a checkpoint/state)."""
+        return not registry_key.endswith("_state")
+
+    def _plan_ids(self) -> set[str]:
+        """Get all known plan IDs from both in-memory registry and disk.
+
+        Excludes checkpoint/state files. Combines both sources because
+        in-memory plans (created this session) may not be on disk yet,
+        and disk plans (from previous sessions) may not be loaded yet.
+        """
+        known: set[str] = set()
+
+        # In-memory registry keys
+        for key in self.plan_registry.plans:
+            if self._is_plan_id(key):
+                known.add(key)
+
+        # Plan files on disk ({plan_id}.json)
+        if self.plans_dir.exists():
+            for path in self.plans_dir.glob("*.json"):
+                file_id = path.stem
+                if self._is_plan_id(file_id):
+                    known.add(file_id)
+
+        return known
+
+    async def list_plans(self) -> list[dict[str, Any]]:
+        """List all saved plans as dicts.
+
+        Returns:
+            List of plan summaries with id, title, step count.
+        """
+        try:
+            # Load all plans (populates the registry's in-memory cache)
+            await self.plan_registry.get_all_plans()
+
+            # Only return real plans, not checkpoint/state entries
+            valid_ids = self._plan_ids()
+            result = []
+            for pid in sorted(valid_ids):
+                plan = await self.plan_registry.get_plan(pid)
+                if plan is not None:
+                    plan_dict: dict[str, Any] = await plan.to_dict()
+                    result.append(plan_dict)
+            return result
+        except Exception as e:
+            logger.warning("Failed to list plans: %s", e)
+            return []
+
+    async def _resolve_plan_id(self, plan_id: str) -> str | None:
+        """Resolve a full or prefix plan ID to the full UUID.
+
+        Supports prefix matching: if the given ID is a prefix of exactly
+        one plan's UUID, that plan is returned. This allows users to use
+        the truncated IDs shown by ``/plan list``.
+
+        Args:
+            plan_id: Full UUID or unique prefix.
+
+        Returns:
+            Full plan UUID, or None if not found or ambiguous.
+        """
+        # Try exact match first (fast path)
+        if self._is_plan_id(plan_id):
+            plan = await self.plan_registry.get_plan(plan_id)
+            if plan is not None:
+                return plan_id
+
+        # Ensure all plans are loaded, then prefix match
+        await self.plan_registry.get_all_plans()
+        valid_ids = self._plan_ids()
+        matches = [pid for pid in valid_ids if pid.startswith(plan_id)]
+
+        if len(matches) == 1:
+            return matches[0]
+        if len(matches) > 1:
+            logger.warning(
+                "Ambiguous plan prefix '%s' matches %d plans", plan_id, len(matches)
+            )
+        return None
+
+    async def get_plan(self, plan_id: str) -> dict[str, Any] | None:
+        """Load a plan by ID (or unique prefix), returned as a dict.
+
+        Reads from the saved JSON file directly to preserve depends_on
+        fields that are lost through PlanRegistry's to_dict().
+
+        Args:
+            plan_id: The plan's UUID or a unique prefix.
+
+        Returns:
+            Plan dict or None if not found.
+        """
+        try:
+            resolved_id = await self._resolve_plan_id(plan_id)
+            if resolved_id is None:
+                return None
+
+            # Load directly from disk to preserve depends_on
+            plan_path = self.plans_dir / f"{resolved_id}.json"
+            if plan_path.exists():
+                data: dict[str, Any] = json.loads(plan_path.read_text(encoding="utf-8"))
+                return data
+
+            # Fall back to registry (e.g., in-memory-only plans)
+            plan = await self.plan_registry.get_plan(resolved_id)
+            if plan is None:
+                return None
+            result: dict[str, Any] = await plan.to_dict()
+            return result
+        except Exception as e:
+            logger.warning("Failed to load plan %s: %s", plan_id, e)
+            return None
+
+    async def get_plan_object(self, plan_id: str) -> UniversalPlan | None:
+        """Load a plan by ID (or unique prefix) as a UniversalPlan object.
+
+        Args:
+            plan_id: The plan's UUID or a unique prefix.
+
+        Returns:
+            UniversalPlan or None if not found.
+        """
+        try:
+            resolved_id = await self._resolve_plan_id(plan_id)
+            if resolved_id is None:
+                return None
+            return await self.plan_registry.get_plan(resolved_id)
+        except Exception as e:
+            logger.warning("Failed to load plan %s: %s", plan_id, e)
+            return None
+
+    async def save_plan(self, plan: UniversalPlan) -> str:
+        """Save a UniversalPlan to the registry.
+
+        Args:
+            plan: The plan to save.
+
+        Returns:
+            The plan ID.
+        """
+        plan_id: str = await self.plan_registry.register_plan(plan)
+        return plan_id
+
+    async def save_plan_from_dict(self, plan_dict: dict[str, Any]) -> str:
+        """Build a UniversalPlan from a dict and save it.
+
+        Saves the plan with dependency information preserved. The upstream
+        PlanRegistry's to_dict() drops depends_on fields, so we overwrite
+        the saved JSON with our enriched version that retains them.
+
+        Args:
+            plan_dict: Plan dict with title, steps, etc.
+
+        Returns:
+            The plan ID.
+        """
+        graph = InMemoryGraphStore()
+        plan = await self._build_plan_from_dict(plan_dict, graph)
+        plan_id: str = await self.plan_registry.register_plan(plan)
+
+        # Re-save with depends_on preserved.
+        # PlanRegistry.register_plan -> to_dict() drops depends_on,
+        # so we patch the saved JSON to include the original dependencies.
+        self._patch_saved_plan(plan_id, plan_dict)
+
+        return plan_id
+
+    def _patch_saved_plan(self, plan_id: str, original_dict: dict[str, Any]) -> None:
+        """Patch the saved plan JSON to preserve depends_on from the original dict."""
+        plan_path = self.plans_dir / f"{plan_id}.json"
+        if not plan_path.exists():
+            return
+
+        try:
+            saved: dict[str, Any] = json.loads(plan_path.read_text(encoding="utf-8"))
+            original_steps = original_dict.get("steps", [])
+            saved_steps = saved.get("steps", [])
+
+            # Build mapping: 0-based position → actual saved step index.
+            # The LLM generates 0-based depends_on but PlanRegistry
+            # assigns 1-based string indices ("1", "2", ...).
+            pos_to_index: dict[int, str] = {}
+            for i, step in enumerate(saved_steps):
+                pos_to_index[i] = str(step.get("index", str(i + 1)))
+
+            for i, step in enumerate(saved_steps):
+                if i < len(original_steps):
+                    orig_step = original_steps[i]
+                    if "depends_on" in orig_step:
+                        # Convert 0-based positional refs to actual step indices
+                        converted = []
+                        for dep in orig_step["depends_on"]:
+                            dep_int = int(dep) if isinstance(dep, (int, str)) else dep
+                            if dep_int in pos_to_index:
+                                converted.append(pos_to_index[dep_int])
+                            else:
+                                converted.append(str(dep))
+                        step["depends_on"] = converted
+
+            plan_path.write_text(
+                json.dumps(saved, indent=2, default=str),
+                encoding="utf-8",
+            )
+        except Exception as e:
+            logger.warning("Failed to patch plan %s with dependencies: %s", plan_id, e)
+
+    async def delete_plan(self, plan_id: str) -> bool:
+        """Delete a plan by ID (or unique prefix).
+
+        Args:
+            plan_id: The plan's UUID or a unique prefix.
+
+        Returns:
+            True if deleted, False if not found.
+        """
+        try:
+            resolved_id = await self._resolve_plan_id(plan_id)
+            if resolved_id is None:
+                return False
+            deleted: bool = self.plan_registry.delete_plan(resolved_id)
+            return deleted
+        except Exception as e:
+            logger.warning("Failed to delete plan %s: %s", plan_id, e)
+            return False
+
+    async def _build_plan_from_dict(
+        self, plan_dict: dict[str, Any], graph: InMemoryGraphStore
+    ) -> UniversalPlan:
+        """Build a UniversalPlan from a plan dict.
+
+        Args:
+            plan_dict: Dict with title, steps, tool_calls, dependencies.
+            graph: Graph store to build the plan in.
+
+        Returns:
+            Constructed UniversalPlan.
+        """
+        plan = UniversalPlan(
+            title=plan_dict.get("title", "Untitled"),
+            description=plan_dict.get("description"),
+            graph=graph,
+            tags=plan_dict.get("tags"),
+        )
+
+        # Set variables if present
+        for key, value in plan_dict.get("variables", {}).items():
+            plan.set_variable(key, value)
+
+        # Add each step
+        for step in plan_dict.get("steps", []):
+            tool_calls = step.get("tool_calls", [])
+            # Support both tool_calls list and direct tool field
+            if tool_calls:
+                tc = tool_calls[0]
+                tool = tc.get("name", "unknown")
+                args = tc.get("args", {})
+            else:
+                tool = step.get("tool", "unknown")
+                args = step.get("args", {})
+
+            depends_on = step.get("depends_on", [])
+            dep_indices = [str(d) for d in depends_on] if depends_on else []
+
+            await plan.add_tool_step(
+                title=step.get("title", "Untitled step"),
+                tool=tool,
+                args=args,
+                depends_on=dep_indices,
+                result_variable=step.get("result_variable"),
+            )
+
+        return plan
diff --git a/src/mcp_cli/planning/executor.py b/src/mcp_cli/planning/executor.py
new file mode 100644
index 00000000..6d9143d8
--- /dev/null
+++ b/src/mcp_cli/planning/executor.py
@@ -0,0 +1,1146 @@
+# src/mcp_cli/planning/executor.py
+"""PlanRunner — orchestrates LLM-driven plan execution.
+
+Executes plans step-by-step with:
+- LLM-driven tool call generation with tool schemas
+- Automatic retry on failure with LLM error correction
+- Parallel batch execution for independent steps (topological batching)
+- Progress callbacks for terminal/dashboard display
+- Dry-run mode (trace without executing)
+- Execution checkpointing and resume
+- DAG visualization
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import re
+import time
+from collections.abc import Callable, Coroutine
+from typing import Any, Protocol, Union, runtime_checkable
+
+from pydantic import BaseModel, Field
+
+from chuk_ai_planner.execution.models import ToolExecutionRequest
+
+from mcp_cli.chat.models import MessageRole
+from mcp_cli.config.defaults import (
+    DEFAULT_PLAN_CHECKPOINT_MAX_CHARS,
+    DEFAULT_PLAN_DAG_TITLE_MAX_CHARS,
+    DEFAULT_PLAN_MAX_CONCURRENCY,
+    DEFAULT_PLAN_MAX_STEP_RETRIES,
+    DEFAULT_PLAN_VARIABLE_SUMMARY_MAX_CHARS,
+)
+from mcp_cli.config.enums import PlanStatus
+from mcp_cli.planning.backends import McpToolBackend
+from mcp_cli.planning.context import PlanningContext
+
+logger = logging.getLogger(__name__)
+
+
+@runtime_checkable
+class ModelManagerProtocol(Protocol):
+    """Minimal interface for model manager used by PlanRunner."""
+
+    def get_client(
+        self,
+        provider: str | None = None,
+        model: str | None = None,
+    ) -> Any: ...
+
+
+# Callback type aliases (sync or async)
+ToolStartCallback = Callable[
+    [str, dict[str, Any]],
+    Union[None, Coroutine[Any, Any, None]],
+]
+"""Called before each tool execution: (tool_name, arguments) -> None."""
+
+ToolCompleteCallback = Callable[
+    [str, str, bool, float],
+    Union[None, Coroutine[Any, Any, None]],
+]
+"""Called after each tool execution: (tool_name, result_text, success, elapsed) -> None."""
+
+
+class StepResult(BaseModel):
+    """Result of a single plan step execution."""
+
+    step_index: str
+    step_title: str
+    tool_name: str
+    success: bool
+    result: Any = None
+    error: str | None = None
+    duration: float = 0.0
+
+    model_config = {"arbitrary_types_allowed": True}
+
+
+class PlanExecutionResult(BaseModel):
+    """Result of executing an entire plan."""
+
+    plan_id: str
+    plan_title: str
+    success: bool
+    steps: list[StepResult] = Field(default_factory=list)
+    variables: dict[str, Any] = Field(default_factory=dict)
+    total_duration: float = 0.0
+    error: str | None = None
+    replanned: bool = False
+
+    model_config = {"arbitrary_types_allowed": True}
+
+
+class PlanRunner:
+    """Orchestrates LLM-driven plan execution with mcp-cli integration.
+
+    Each step is executed by the LLM: given the step description and tool
+    schemas, the LLM generates the actual tool call (with correct parameter
+    names and values). If a tool fails, the error is fed back to the LLM
+    which can retry with corrected arguments.
+
+    Features:
+    - LLM-driven tool call generation with tool schemas
+    - Automatic retry on failure with LLM error correction
+    - Parallel batch execution for independent steps (topological batching)
+    - Progress callbacks for terminal/dashboard display
+    - Dry-run mode (trace without executing)
+    - Execution checkpointing and resume
+    - DAG visualization
+    """
+
+    def __init__(
+        self,
+        context: PlanningContext,
+        *,
+        model_manager: ModelManagerProtocol | None = None,
+        on_step_start: Callable[[str, str, str], None] | None = None,
+        on_step_complete: Callable[[StepResult], None] | None = None,
+        on_tool_start: ToolStartCallback | None = None,
+        on_tool_complete: ToolCompleteCallback | None = None,
+        enable_guards: bool = False,
+        max_concurrency: int = DEFAULT_PLAN_MAX_CONCURRENCY,
+        max_step_retries: int = DEFAULT_PLAN_MAX_STEP_RETRIES,
+    ) -> None:
+        """Initialize the plan runner.
+
+        Args:
+            context: PlanningContext with tool_manager and graph_store.
+            model_manager: ModelManager for LLM-driven step execution.
+                When provided, the LLM generates tool calls from step
+                descriptions and can retry on failure with error feedback.
+            on_step_start: Callback(step_index, step_title, tool_name) before each step.
+            on_step_complete: Callback(StepResult) after each step.
+            on_tool_start: Async callback(tool_name, arguments) before each tool call
+                within a step. Called for each agentic loop tool invocation.
+            on_tool_complete: Async callback(tool_name, result_str, success, elapsed)
+                after each tool call. Called for each agentic loop tool invocation.
+            enable_guards: If True, enforce guard checks during execution.
+            max_concurrency: Maximum concurrent steps within a batch.
+            max_step_retries: Maximum LLM retry attempts per step on failure.
+        """
+        self.context = context
+        self._model_manager = model_manager
+        self._on_step_start = on_step_start
+        self._on_step_complete = on_step_complete
+        self._on_tool_start = on_tool_start
+        self._on_tool_complete = on_tool_complete
+        self._max_concurrency = max_concurrency
+        self._max_step_retries = max_step_retries
+
+        # Create the MCP tool backend with guard integration
+        self._backend = McpToolBackend(
+            context.tool_manager,
+            enable_guards=enable_guards,
+        )
+
+        # Tool catalog cache (lazy-loaded, protected by lock for parallel steps)
+        self._tool_catalog: list[dict[str, Any]] | None = None
+        self._tool_catalog_lock = asyncio.Lock()
+
+    async def execute_plan(
+        self,
+        plan_data: dict[str, Any],
+        *,
+        variables: dict[str, Any] | None = None,
+        dry_run: bool = False,
+        checkpoint: bool = True,
+    ) -> PlanExecutionResult:
+        """Execute a plan with parallel batch execution.
+
+        Steps are grouped into topological batches. Steps within a batch
+        have no dependencies on each other and run concurrently. Batches
+        execute sequentially to respect the dependency DAG.
+
+        Args:
+            plan_data: Plan dict (from PlanRegistry or plan generation).
+            variables: Optional variable overrides for parameterized plans.
+            dry_run: If True, trace without executing tools.
+            checkpoint: If True, persist state after each batch.
+
+        Returns:
+            PlanExecutionResult with step results and final variables.
+        """
+        start_time = time.perf_counter()
+        plan_id = plan_data.get("id", "unknown")
+        plan_title = plan_data.get("title", "Untitled Plan")
+
+        logger.info("Executing plan: %s (%s)", plan_title, plan_id)
+
+        if dry_run:
+            return await self._dry_run(plan_data, variables)
+
+        try:
+            # Build variable context
+            var_context = dict(plan_data.get("variables", {}))
+            if variables:
+                var_context.update(variables)
+
+            steps = plan_data.get("steps", [])
+            if not steps:
+                return PlanExecutionResult(
+                    plan_id=plan_id,
+                    plan_title=plan_title,
+                    success=True,
+                    total_duration=time.perf_counter() - start_time,
+                )
+
+            # Compute topological batches
+            batches = _compute_batches(steps)
+            logger.info(
+                "Plan %s: %d steps in %d batches",
+                plan_id,
+                len(steps),
+                len(batches),
+            )
+
+            all_step_results: list[StepResult] = []
+            completed_indices: list[str] = []
+
+            for batch_num, batch in enumerate(batches, 1):
+                logger.debug(
+                    "Batch %d/%d: %d steps",
+                    batch_num,
+                    len(batches),
+                    len(batch),
+                )
+
+                if len(batch) == 1:
+                    # Single step — execute directly (no gather overhead)
+                    step = batch[0]
+                    result = await self._execute_step(step, var_context)
+                    all_step_results.append(result)
+
+                    if result.success:
+                        completed_indices.append(result.step_index)
+                    else:
+                        if checkpoint:
+                            self._save_checkpoint(
+                                plan_id,
+                                completed_steps=completed_indices,
+                                variables=var_context,
+                                status=PlanStatus.FAILED,
+                            )
+                        return PlanExecutionResult(
+                            plan_id=plan_id,
+                            plan_title=plan_title,
+                            success=False,
+                            steps=all_step_results,
+                            variables=var_context,
+                            total_duration=time.perf_counter() - start_time,
+                            error=f"Step {result.step_index} failed: {result.error}",
+                        )
+                else:
+                    # Multiple independent steps — execute concurrently
+                    batch_results = await self._execute_batch(batch, var_context)
+                    all_step_results.extend(batch_results)
+
+                    failed = [r for r in batch_results if not r.success]
+                    if failed:
+                        completed_indices.extend(
+                            r.step_index for r in batch_results if r.success
+                        )
+                        if checkpoint:
+                            self._save_checkpoint(
+                                plan_id,
+                                completed_steps=completed_indices,
+                                variables=var_context,
+                                status=PlanStatus.FAILED,
+                            )
+                        fail_msgs = "; ".join(
+                            f"step {r.step_index}: {r.error}" for r in failed
+                        )
+                        return PlanExecutionResult(
+                            plan_id=plan_id,
+                            plan_title=plan_title,
+                            success=False,
+                            steps=all_step_results,
+                            variables=var_context,
+                            total_duration=time.perf_counter() - start_time,
+                            error=f"Batch {batch_num} had failures: {fail_msgs}",
+                        )
+
+                    completed_indices.extend(r.step_index for r in batch_results)
+
+                # Checkpoint after each batch
+                if checkpoint:
+                    self._save_checkpoint(
+                        plan_id,
+                        completed_steps=completed_indices,
+                        variables=var_context,
+                        status=PlanStatus.RUNNING,
+                    )
+
+            total_duration = time.perf_counter() - start_time
+
+            # Final checkpoint
+            if checkpoint:
+                self._save_checkpoint(
+                    plan_id,
+                    completed_steps=completed_indices,
+                    variables=var_context,
+                    status=PlanStatus.COMPLETED,
+                )
+
+            return PlanExecutionResult(
+                plan_id=plan_id,
+                plan_title=plan_title,
+                success=True,
+                steps=all_step_results,
+                variables=var_context,
+                total_duration=total_duration,
+            )
+
+        except Exception as e:
+            total_duration = time.perf_counter() - start_time
+            logger.error("Plan execution failed: %s", e)
+            return PlanExecutionResult(
+                plan_id=plan_id,
+                plan_title=plan_title,
+                success=False,
+                total_duration=total_duration,
+                error=str(e),
+            )
+
+    async def _get_tool_catalog(self) -> list[dict[str, Any]]:
+        """Get tool catalog, caching for the duration of the plan run.
+
+        Uses an asyncio.Lock to prevent duplicate fetches when parallel
+        batch steps hit the cache simultaneously.
+        """
+        async with self._tool_catalog_lock:
+            if self._tool_catalog is None:
+                self._tool_catalog = await self.context.get_tool_catalog()
+            return self._tool_catalog
+
+    async def _execute_step(
+        self,
+        step: dict[str, Any],
+        var_context: dict[str, Any],
+    ) -> StepResult:
+        """Execute a single plan step using the LLM for tool call generation.
+
+        When a model_manager is available, the LLM generates the tool call
+        from the step description and tool schemas, then the tool is executed.
+        If the tool fails, the error is fed back to the LLM for retry.
+
+        Falls back to static arg execution when no model_manager is provided.
+        """
+        step_index = step.get("index", "?")
+        step_title = step.get("title", "Untitled")
+        tool_calls = step.get("tool_calls", [])
+        hint_tool = tool_calls[0]["name"] if tool_calls else step.get("tool", "none")
+        hint_args = (
+            tool_calls[0].get("args", {}) if tool_calls else step.get("args", {})
+        )
+
+        if self._on_step_start:
+            self._on_step_start(step_index, step_title, hint_tool)
+
+        start_time = time.perf_counter()
+
+        # LLM-driven execution (agentic loop with retry)
+        if self._model_manager:
+            logger.info(
+                "Step %s: using agentic LLM execution (model_manager=%s)",
+                step_index,
+                type(self._model_manager).__name__,
+            )
+            step_result = await self._execute_step_with_llm(
+                step, var_context, step_index, step_title, hint_tool, hint_args
+            )
+        else:
+            logger.info(
+                "Step %s: using static arg execution (no model_manager)", step_index
+            )
+            step_result = await self._execute_step_static(
+                step, var_context, step_index, step_title, hint_tool, hint_args
+            )
+
+        step_result.duration = time.perf_counter() - start_time
+
+        if self._on_step_complete:
+            self._on_step_complete(step_result)
+
+        return step_result
+
+    async def _execute_step_static(
+        self,
+        step: dict[str, Any],
+        var_context: dict[str, Any],
+        step_index: str,
+        step_title: str,
+        tool_name: str,
+        args: dict[str, Any],
+    ) -> StepResult:
+        """Execute a step using the plan's static arguments (no LLM)."""
+        resolved_args = _resolve_variables(args, var_context)
+
+        try:
+            if self._on_tool_start:
+                await _maybe_await(self._on_tool_start(tool_name, resolved_args))
+
+            tool_start = time.perf_counter()
+            request = ToolExecutionRequest(
+                tool_name=tool_name,
+                args=resolved_args,
+                step_id=f"step-{step_index}",
+            )
+            exec_result = await self._backend.execute_tool(request)
+            tool_elapsed = time.perf_counter() - tool_start
+
+            result_var = step.get("result_variable")
+            if result_var and exec_result.success:
+                var_context[result_var] = exec_result.result
+
+            result_text = (
+                json.dumps(exec_result.result, default=str)
+                if exec_result.success
+                else (exec_result.error or "Tool execution failed")
+            )
+            if self._on_tool_complete:
+                await _maybe_await(
+                    self._on_tool_complete(
+                        tool_name, result_text, exec_result.success, tool_elapsed
+                    )
+                )
+
+            return StepResult(
+                step_index=step_index,
+                step_title=step_title,
+                tool_name=tool_name,
+                success=exec_result.success,
+                result=exec_result.result,
+                error=exec_result.error,
+            )
+        except Exception as e:
+            return StepResult(
+                step_index=step_index,
+                step_title=step_title,
+                tool_name=tool_name,
+                success=False,
+                error=str(e),
+            )
+
+    async def _execute_step_with_llm(
+        self,
+        step: dict[str, Any],
+        var_context: dict[str, Any],
+        step_index: str,
+        step_title: str,
+        hint_tool: str,
+        hint_args: dict[str, Any],
+    ) -> StepResult:
+        """Execute a step via a full agentic loop.
+
+        The LLM drives the entire step execution:
+        1. Sees the step description, tool schemas, and variable context
+        2. Generates a tool call with correct parameters
+        3. Sees the tool result (success OR failure)
+        4. Evaluates the result — was it useful? Empty? Wrong?
+        5. Decides: respond with final answer (step done) or call another tool
+
+        The loop continues until the LLM responds with text (no tool call),
+        indicating it considers the step complete, or max turns are exhausted.
+        """
+        tool_catalog = await self._get_tool_catalog()
+        if self._model_manager is None:
+            raise RuntimeError(
+                "_execute_step_with_llm requires model_manager but none was provided"
+            )
+        client = self._model_manager.get_client()
+
+        # Build variable summary for context
+        var_summary = _summarize_variables(var_context) if var_context else "none"
+
+        # Resolve any ${var} references in hint args for context
+        resolved_hints = _resolve_variables(hint_args, var_context)
+        hint_str = json.dumps(resolved_hints, default=str) if resolved_hints else "{}"
+
+        result_var = step.get("result_variable")
+
+        # Initial messages: instruct the LLM to act as an agent for this step
+        messages: list[dict[str, Any]] = [
+            {
+                "role": MessageRole.SYSTEM,
+                "content": (
+                    "You are executing one step of a plan. Your job is to call "
+                    "the appropriate tool(s) to accomplish the step goal.\n\n"
+                    "Rules:\n"
+                    "- Use the tool schemas to determine correct parameter names and types\n"
+                    "- After each tool call, you will see the result\n"
+                    "- If the result is empty, null, or unhelpful, try again with "
+                    "different parameters (e.g. a simpler location name)\n"
+                    "- If the tool returns an error, fix the parameters and retry\n"
+                    "- When you have a satisfactory result, respond with a brief "
+                    "text summary (no tool call) to complete the step\n"
+                    "- Ensure numeric parameters are numbers, not strings\n\n"
+                    f"Available variables from previous steps:\n{var_summary}"
+                ),
+            },
+            {
+                "role": MessageRole.USER,
+                "content": (
+                    f"Execute this plan step:\n"
+                    f"  Step {step_index}: {step_title}\n"
+                    f"  Suggested tool: {hint_tool}\n"
+                    f"  Suggested args: {hint_str}\n\n"
+                    f"Call the tool now."
+                ),
+            },
+        ]
+
+        last_error: str | None = None
+        last_result: Any = None
+        used_tool: str = hint_tool
+        max_turns = 1 + self._max_step_retries
+
+        for turn in range(max_turns):
+            try:
+                response = await client.create_completion(
+                    messages=messages,
+                    tools=tool_catalog,
+                    stream=False,
+                )
+
+                tool_call = _extract_tool_call(response)
+
+                if not tool_call:
+                    # LLM responded with text (no tool call).
+                    # On turn 0, it means the LLM chose not to call a tool — fall back.
+                    if turn == 0:
+                        logger.debug(
+                            "Step %s: LLM did not generate tool call, "
+                            "using static args",
+                            step_index,
+                        )
+                        return await self._execute_step_static(
+                            step,
+                            var_context,
+                            step_index,
+                            step_title,
+                            hint_tool,
+                            hint_args,
+                        )
+
+                    # On later turns, the LLM is signaling step completion.
+                    # Use the last successful result as the step output.
+                    if last_result is not None:
+                        if result_var:
+                            var_context[result_var] = last_result
+                        return StepResult(
+                            step_index=step_index,
+                            step_title=step_title,
+                            tool_name=used_tool,
+                            success=True,
+                            result=last_result,
+                        )
+                    # No successful result yet — treat as failure
+                    return StepResult(
+                        step_index=step_index,
+                        step_title=step_title,
+                        tool_name=used_tool,
+                        success=False,
+                        error=last_error or "LLM ended step without a result",
+                    )
+
+                # LLM generated a tool call — execute it
+                used_tool = tool_call["name"]
+                tool_args = tool_call["args"]
+                call_id = f"call_{step_index}_{turn}"
+
+                # Notify UI: tool execution starting
+                if self._on_tool_start:
+                    await _maybe_await(self._on_tool_start(used_tool, tool_args))
+
+                tool_start = time.perf_counter()
+                request = ToolExecutionRequest(
+                    tool_name=used_tool,
+                    args=tool_args,
+                    step_id=f"step-{step_index}",
+                )
+                exec_result = await self._backend.execute_tool(request)
+                tool_elapsed = time.perf_counter() - tool_start
+
+                # Build the assistant + tool result messages for the conversation
+                messages.append(
+                    {
+                        "role": MessageRole.ASSISTANT,
+                        "content": None,
+                        "tool_calls": [
+                            {
+                                "id": call_id,
+                                "type": "function",
+                                "function": {
+                                    "name": used_tool,
+                                    "arguments": json.dumps(tool_args, default=str),
+                                },
+                            }
+                        ],
+                    }
+                )
+
+                if exec_result.success:
+                    last_result = exec_result.result
+                    result_text = json.dumps(exec_result.result, default=str)
+                    messages.append(
+                        {
+                            "role": MessageRole.TOOL,
+                            "tool_call_id": call_id,
+                            "content": result_text,
+                        }
+                    )
+                    # Notify UI: tool execution completed
+                    if self._on_tool_complete:
+                        await _maybe_await(
+                            self._on_tool_complete(
+                                used_tool, result_text, True, tool_elapsed
+                            )
+                        )
+                    logger.debug(
+                        "Step %s turn %d: tool %s succeeded in %.2fs",
+                        step_index,
+                        turn + 1,
+                        used_tool,
+                        tool_elapsed,
+                    )
+                else:
+                    last_error = exec_result.error or "Tool execution failed"
+                    messages.append(
+                        {
+                            "role": MessageRole.TOOL,
+                            "tool_call_id": call_id,
+                            "content": f"Error: {last_error}",
+                        }
+                    )
+                    # Notify UI: tool execution failed
+                    if self._on_tool_complete:
+                        await _maybe_await(
+                            self._on_tool_complete(
+                                used_tool, last_error, False, tool_elapsed
+                            )
+                        )
+                    logger.info(
+                        "Step %s turn %d: tool %s failed in %.2fs: %s",
+                        step_index,
+                        turn + 1,
+                        used_tool,
+                        tool_elapsed,
+                        last_error,
+                    )
+
+            except Exception as e:
+                last_error = str(e)
+                logger.warning(
+                    "Step %s turn %d raised exception: %s",
+                    step_index,
+                    turn + 1,
+                    last_error,
+                )
+
+        # Max turns exhausted — return whatever we have
+        if last_result is not None:
+            if result_var:
+                var_context[result_var] = last_result
+            return StepResult(
+                step_index=step_index,
+                step_title=step_title,
+                tool_name=used_tool,
+                success=True,
+                result=last_result,
+            )
+        return StepResult(
+            step_index=step_index,
+            step_title=step_title,
+            tool_name=used_tool,
+            success=False,
+            error=last_error or "All retry attempts failed",
+        )
+
+    async def _execute_batch(
+        self,
+        batch: list[dict[str, Any]],
+        var_context: dict[str, Any],
+    ) -> list[StepResult]:
+        """Execute a batch of independent steps concurrently.
+
+        Uses asyncio.Semaphore to limit concurrency.
+        """
+        sem = asyncio.Semaphore(self._max_concurrency)
+
+        async def _run_with_sem(step: dict[str, Any]) -> StepResult:
+            async with sem:
+                return await self._execute_step(step, var_context)
+
+        tasks = [asyncio.create_task(_run_with_sem(step)) for step in batch]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+
+        step_results: list[StepResult] = []
+        for i, result in enumerate(results):
+            if isinstance(result, BaseException):
+                step = batch[i]
+                step_results.append(
+                    StepResult(
+                        step_index=step.get("index", "?"),
+                        step_title=step.get("title", "Untitled"),
+                        tool_name=step.get("tool", "none"),
+                        success=False,
+                        error=str(result),
+                    )
+                )
+            else:
+                step_results.append(result)
+        return step_results
+
+    async def _dry_run(
+        self,
+        plan_data: dict[str, Any],
+        variables: dict[str, Any] | None = None,
+    ) -> PlanExecutionResult:
+        """Trace plan execution without running tools.
+
+        Shows what each step would do, including resolved variable references
+        and which steps run in parallel batches.
+        """
+        plan_id = plan_data.get("id", "unknown")
+        plan_title = plan_data.get("title", "Untitled Plan")
+        step_results = []
+        var_context = dict(plan_data.get("variables", {}))
+        if variables:
+            var_context.update(variables)
+
+        steps = plan_data.get("steps", [])
+        batches = _compute_batches(steps)
+
+        for batch in batches:
+            for step in batch:
+                step_index = step.get("index", "?")
+                step_title = step.get("title", "Untitled")
+                tool_calls = step.get("tool_calls", [])
+                tool_name = (
+                    tool_calls[0]["name"] if tool_calls else step.get("tool", "none")
+                )
+
+                if self._on_step_start:
+                    self._on_step_start(step_index, step_title, tool_name)
+
+                step_result = StepResult(
+                    step_index=step_index,
+                    step_title=step_title,
+                    tool_name=tool_name,
+                    success=True,
+                    result="[dry-run: not executed]",
+                )
+                step_results.append(step_result)
+
+                # Simulate variable binding
+                result_var = step.get("result_variable")
+                if result_var:
+                    var_context[result_var] = f"<{tool_name} result>"
+
+                if self._on_step_complete:
+                    self._on_step_complete(step_result)
+
+        return PlanExecutionResult(
+            plan_id=plan_id,
+            plan_title=plan_title,
+            success=True,
+            steps=step_results,
+            variables=var_context,
+        )
+
+    def _save_checkpoint(
+        self,
+        plan_id: str,
+        completed_steps: list[str],
+        variables: dict[str, Any],
+        status: PlanStatus,
+    ) -> None:
+        """Save execution checkpoint for resume support."""
+        checkpoint_path = self.context.plans_dir / f"{plan_id}_state.json"
+        checkpoint = {
+            "plan_id": plan_id,
+            "status": status,
+            "completed_steps": completed_steps,
+            "variables": _serialize_variables(variables),
+        }
+
+        try:
+            checkpoint_path.write_text(
+                json.dumps(checkpoint, indent=2, default=str),
+                encoding="utf-8",
+            )
+            logger.debug("Saved checkpoint for plan %s: %s", plan_id, status)
+        except Exception as e:
+            logger.warning("Failed to save checkpoint for plan %s: %s", plan_id, e)
+
+    def load_checkpoint(self, plan_id: str) -> dict[str, Any] | None:
+        """Load execution checkpoint for resume."""
+        checkpoint_path = self.context.plans_dir / f"{plan_id}_state.json"
+        if not checkpoint_path.exists():
+            return None
+
+        try:
+            data: dict[str, Any] = json.loads(
+                checkpoint_path.read_text(encoding="utf-8")
+            )
+            return data
+        except Exception as e:
+            logger.warning("Failed to load checkpoint for plan %s: %s", plan_id, e)
+            return None
+
+
+# ── Topological Batching ───────────────────────────────────────────────────
+
+
+def _compute_batches(steps: list[dict[str, Any]]) -> list[list[dict[str, Any]]]:
+    """Compute parallel execution batches via topological sort.
+
+    Groups steps into batches where all steps in a batch have their
+    dependencies satisfied by previous batches. Steps within a batch
+    can execute concurrently.
+
+    Uses Kahn's BFS algorithm for topological sorting.
+
+    Args:
+        steps: List of step dicts with 'index' and 'depends_on' fields.
+
+    Returns:
+        List of batches, each batch is a list of step dicts.
+    """
+    if not steps:
+        return []
+
+    # Build index maps
+    index_to_step: dict[str, dict[str, Any]] = {}
+    for i, step in enumerate(steps):
+        idx = str(step.get("index", str(i + 1)))
+        step = dict(step)  # Don't mutate original
+        step["index"] = idx
+        index_to_step[idx] = step
+
+    # Build dependency graph
+    in_degree: dict[str, int] = {idx: 0 for idx in index_to_step}
+    dependents: dict[str, list[str]] = {idx: [] for idx in index_to_step}
+
+    for idx, step in index_to_step.items():
+        deps = step.get("depends_on", [])
+        for dep in deps:
+            dep_str = str(dep)
+            if dep_str in index_to_step:
+                in_degree[idx] += 1
+                dependents[dep_str].append(idx)
+
+    # Kahn's BFS: find all ready nodes (in_degree == 0), emit as batch
+    batches = []
+    remaining = set(index_to_step.keys())
+
+    while remaining:
+        # Find all nodes with no unmet dependencies
+        ready = [idx for idx in remaining if in_degree.get(idx, 0) == 0]
+
+        if not ready:
+            # Cycle detected — break tie by taking first remaining node
+            logger.warning("Dependency cycle detected, forcing execution order")
+            ready = [sorted(remaining)[0]]
+
+        batch = [index_to_step[idx] for idx in sorted(ready)]
+        batches.append(batch)
+
+        # Remove processed nodes and update dependents
+        for idx in ready:
+            remaining.discard(idx)
+            for dep_idx in dependents.get(idx, []):
+                in_degree[dep_idx] = max(0, in_degree[dep_idx] - 1)
+
+    return batches
+
+
+# ── Variable Resolution ───────────────────────────────────────────────────
+
+
+def _resolve_variables(
+    args: dict[str, Any], variables: dict[str, Any]
+) -> dict[str, Any]:
+    """Resolve ${var} references in tool arguments.
+
+    Supports:
+    - ${variable} — direct replacement
+    - ${variable.field} — nested dict access
+    - Template strings: "prefix ${var} suffix"
+
+    Args:
+        args: Tool arguments dict (may contain ${var} references).
+        variables: Current variable bindings.
+
+    Returns:
+        New dict with all resolvable references replaced.
+    """
+    resolved = {}
+    for key, value in args.items():
+        resolved[key] = _resolve_value(value, variables)
+    return resolved
+
+
+def _resolve_value(value: Any, variables: dict[str, Any]) -> Any:
+    """Resolve a single value, recursing into dicts and lists."""
+    if isinstance(value, str):
+        return _resolve_string(value, variables)
+    if isinstance(value, dict):
+        return {k: _resolve_value(v, variables) for k, v in value.items()}
+    if isinstance(value, list):
+        return [_resolve_value(v, variables) for v in value]
+    return value
+
+
+def _resolve_string(value: str, variables: dict[str, Any]) -> Any:
+    """Resolve ${var} references in a string value."""
+    if not value or "${" not in value:
+        return value
+
+    # Single variable reference: "${var}" → return the value directly (preserves type)
+    if value.startswith("${") and value.endswith("}") and value.count("${") == 1:
+        var_path = value[2:-1]
+        resolved = _resolve_path(var_path, variables)
+        return resolved if resolved is not None else value
+
+    # Template string: "text ${var} more" → string interpolation
+    def replacer(match: re.Match) -> str:
+        var_path = match.group(1)
+        resolved = _resolve_path(var_path, variables)
+        return str(resolved) if resolved is not None else match.group(0)
+
+    return re.sub(r"\$\{([^}]+)}", replacer, value)
+
+
+def _resolve_path(var_path: str, variables: dict[str, Any]) -> Any:
+    """Resolve a dotted variable path like 'api.endpoint.port'."""
+    parts = var_path.split(".")
+    current: Any = variables
+    for part in parts:
+        if isinstance(current, dict) and part in current:
+            current = current[part]
+        else:
+            return None
+    return current
+
+
+# ── Callback Helpers ──────────────────────────────────────────────────────
+
+
+async def _maybe_await(result: Any) -> Any:
+    """Await a result if it's a coroutine, otherwise return it directly.
+
+    Allows callbacks to be either sync or async functions.
+    """
+    if asyncio.iscoroutine(result):
+        return await result
+    return result
+
+
+# ── LLM Response Helpers ──────────────────────────────────────────────────
+
+
+def _extract_tool_call(response: Any) -> dict[str, Any] | None:
+    """Extract a tool call from an LLM completion response.
+
+    Handles three response formats:
+    1. chuk_llm native: {"response": ..., "tool_calls": [...], "usage": {...}}
+    2. OpenAI-style dict: {"choices": [{"message": {"tool_calls": [...]}}]}
+    3. Object-style: response.choices[0].message.tool_calls
+
+    Returns:
+        Dict with 'name' and 'args', or None if no tool call found.
+    """
+    if response is None:
+        return None
+
+    if isinstance(response, dict):
+        # chuk_llm native format: top-level "tool_calls" key
+        tool_calls = response.get("tool_calls")
+        if tool_calls:
+            return _parse_tool_call_entry(tool_calls[0])
+
+        # OpenAI-style dict format: choices[0].message.tool_calls
+        choices = response.get("choices", [])
+        if choices:
+            message = choices[0].get("message", {})
+            tool_calls = message.get("tool_calls", [])
+            if tool_calls:
+                return _parse_tool_call_entry(tool_calls[0])
+
+        return None
+
+    # Object-style response (e.g., Pydantic models)
+    # Check for top-level tool_calls attribute (chuk_llm objects)
+    if hasattr(response, "tool_calls") and response.tool_calls:
+        return _parse_tool_call_entry(response.tool_calls[0])
+
+    # OpenAI-style object: response.choices[0].message.tool_calls
+    if hasattr(response, "choices") and response.choices:
+        choice = response.choices[0]
+        message = getattr(choice, "message", None)
+        if message:
+            tool_calls = getattr(message, "tool_calls", None)
+            if tool_calls:
+                return _parse_tool_call_entry(tool_calls[0])
+
+    return None
+
+
+def _parse_tool_call_entry(tc: Any) -> dict[str, Any] | None:
+    """Parse a single tool call entry from either dict or object format.
+
+    Args:
+        tc: A tool call entry (dict or object with function attribute).
+
+    Returns:
+        Dict with 'name' and 'args', or None if parsing fails.
+    """
+    if isinstance(tc, dict):
+        func = tc.get("function", {})
+        name = func.get("name", "")
+        args_str = func.get("arguments", "{}")
+    elif hasattr(tc, "function"):
+        func = tc.function
+        name = getattr(func, "name", "")
+        args_str = getattr(func, "arguments", "{}")
+    else:
+        return None
+
+    try:
+        args = json.loads(args_str) if isinstance(args_str, str) else args_str
+    except (json.JSONDecodeError, TypeError):
+        args = {}
+
+    return {"name": name, "args": args} if name else None
+
+
+def _summarize_variables(var_context: dict[str, Any]) -> str:
+    """Build a compact summary of current variables for the LLM."""
+    if not var_context:
+        return "none"
+    lines = []
+    for key, value in var_context.items():
+        text = json.dumps(value, default=str)
+        if len(text) > DEFAULT_PLAN_VARIABLE_SUMMARY_MAX_CHARS:
+            text = text[:DEFAULT_PLAN_VARIABLE_SUMMARY_MAX_CHARS] + "..."
+        lines.append(f"  ${{{key}}} = {text}")
+    return "\n".join(lines)
+
+
+# ── DAG Visualization ──────────────────────────────────────────────────────
+
+
+def render_plan_dag(plan_data: dict[str, Any]) -> str:
+    """Render a plan as an ASCII DAG for terminal display.
+
+    Shows steps with their tools, dependencies, and execution status.
+    Parallel steps (same batch) are shown with a parallel indicator.
+
+    Args:
+        plan_data: Plan dict with steps and dependencies.
+
+    Returns:
+        Multiline string with the DAG visualization.
+    """
+    steps = plan_data.get("steps", [])
+    if not steps:
+        return "  (empty plan)"
+
+    # Compute batches for parallel indicators
+    batches = _compute_batches(steps)
+    step_to_batch: dict[str, int] = {}
+    for batch_num, batch in enumerate(batches, 1):
+        for step in batch:
+            step_to_batch[step.get("index", "?")] = batch_num
+
+    lines = []
+    current_batch = 0
+
+    for i, step in enumerate(steps):
+        index = step.get("index", str(i + 1))
+        title = step.get("title", "Untitled")[:DEFAULT_PLAN_DAG_TITLE_MAX_CHARS]
+        tool_calls = step.get("tool_calls", [])
+        tool_name = tool_calls[0]["name"] if tool_calls else step.get("tool", "?")
+        depends_on = step.get("depends_on", [])
+
+        # Status indicator
+        status = step.get("_status", PlanStatus.PENDING)
+        status_char = {
+            PlanStatus.PENDING: "○",
+            PlanStatus.RUNNING: "◉",
+            PlanStatus.COMPLETED: "●",
+            PlanStatus.FAILED: "✗",
+        }.get(status, "○")
+
+        # Batch separator for parallel groups
+        batch_num = step_to_batch.get(str(index), 0)
+        if batch_num != current_batch:
+            if current_batch > 0:
+                lines.append("")  # Blank line between batches
+            current_batch = batch_num
+
+        # Dependency arrows
+        dep_str = ""
+        if depends_on:
+            dep_refs = ", ".join(str(d) for d in depends_on)
+            dep_str = f"  ← after: {dep_refs}"
+
+        # Parallel indicator
+        batch_steps = batches[batch_num - 1] if batch_num > 0 else []
+        parallel_marker = ""
+        if len(batch_steps) > 1:
+            parallel_marker = " ∥"
+
+        lines.append(
+            f"  {status_char} {index}. {title:<35} [{tool_name}]{dep_str}{parallel_marker}"
+        )
+
+    return "\n".join(lines)
+
+
+# ── Serialization Helpers ──────────────────────────────────────────────────
+
+
+def _serialize_variables(variables: dict[str, Any]) -> dict[str, Any]:
+    """Make variables JSON-serializable.
+
+    Truncates large values to prevent bloated checkpoint files.
+    """
+    result: dict[str, Any] = {}
+    for key, value in variables.items():
+        if isinstance(value, str) and len(value) > DEFAULT_PLAN_CHECKPOINT_MAX_CHARS:
+            result[key] = value[:DEFAULT_PLAN_CHECKPOINT_MAX_CHARS] + "... [truncated]"
+        elif isinstance(value, (dict, list)):
+            serialized = json.dumps(value, default=str)
+            if len(serialized) > DEFAULT_PLAN_CHECKPOINT_MAX_CHARS:
+                result[key] = f"[{type(value).__name__}, {len(serialized)} chars]"
+            else:
+                result[key] = value
+        else:
+            result[key] = value
+    return result
diff --git a/src/mcp_cli/planning/tools.py b/src/mcp_cli/planning/tools.py
new file mode 100644
index 00000000..7b9937a0
--- /dev/null
+++ b/src/mcp_cli/planning/tools.py
@@ -0,0 +1,408 @@
+# src/mcp_cli/planning/tools.py
+"""Plan tool definitions and handler for LLM tool interception.
+
+Provides internal tools that the model can call to autonomously create
+and execute plans during conversation. These tools are intercepted in
+tool_processor.py before MCP routing (same pattern as VM and memory tools).
+
+Tools:
+- plan_create: Generate a plan from a goal description
+- plan_execute: Execute a previously created plan
+- plan_create_and_execute: Generate and execute in one call (common case)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import Any
+
+from mcp_cli.planning.context import PlanningContext
+
+logger = logging.getLogger(__name__)
+
+# Tool names for interception in tool_processor.py
+_PLAN_TOOL_NAMES = frozenset({"plan_create", "plan_execute", "plan_create_and_execute"})
+
+
+def get_plan_tools_as_dicts() -> list[dict[str, Any]]:
+    """Return OpenAI-format tool definitions for plan tools."""
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": "plan_create_and_execute",
+                "description": (
+                    "Create and execute a multi-step plan to accomplish a complex goal. "
+                    "Use this when a task requires multiple tool calls that depend on each other "
+                    "(e.g., geocode a location then get weather for those coordinates). "
+                    "The plan is generated from your goal description, then executed automatically. "
+                    "Results from all steps are returned."
+                ),
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "goal": {
+                            "type": "string",
+                            "description": (
+                                "Natural language description of what to accomplish. "
+                                "Be specific about the end result you want."
+                            ),
+                        },
+                    },
+                    "required": ["goal"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "plan_create",
+                "description": (
+                    "Generate a multi-step execution plan from a goal description "
+                    "without executing it. Returns the plan ID and step details. "
+                    "Use plan_execute to run it later, or plan_create_and_execute "
+                    "to do both at once."
+                ),
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "goal": {
+                            "type": "string",
+                            "description": "Natural language description of the goal.",
+                        },
+                    },
+                    "required": ["goal"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "plan_execute",
+                "description": (
+                    "Execute a previously created plan by its ID. "
+                    "Returns the results from all executed steps."
+                ),
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "plan_id": {
+                            "type": "string",
+                            "description": "The plan ID returned by plan_create.",
+                        },
+                    },
+                    "required": ["plan_id"],
+                },
+            },
+        },
+    ]
+
+
+async def handle_plan_tool(
+    tool_name: str,
+    arguments: dict[str, Any],
+    planning_context: PlanningContext,
+    model_manager: Any | None = None,
+    ui_manager: Any | None = None,
+) -> str:
+    """Execute a plan tool and return the result as a JSON string.
+
+    Args:
+        tool_name: One of plan_create, plan_execute, plan_create_and_execute.
+        arguments: Tool arguments from the LLM.
+        planning_context: PlanningContext with tool_manager and plan registry.
+        model_manager: Optional ModelManager for LLM-driven step execution.
+        ui_manager: Optional UI manager for per-step progress display.
+
+    Returns:
+        JSON string with the result (for insertion into conversation history).
+    """
+    if tool_name == "plan_create":
+        return await _handle_plan_create(arguments, planning_context)
+
+    if tool_name == "plan_execute":
+        return await _handle_plan_execute(
+            arguments, planning_context, model_manager, ui_manager
+        )
+
+    if tool_name == "plan_create_and_execute":
+        return await _handle_plan_create_and_execute(
+            arguments, planning_context, model_manager, ui_manager
+        )
+
+    return json.dumps({"error": f"Unknown plan tool: {tool_name}"})
+
+
+async def _handle_plan_create(
+    arguments: dict[str, Any],
+    context: PlanningContext,
+) -> str:
+    """Generate a plan from a goal description."""
+    goal = arguments.get("goal", "")
+    if not goal:
+        return json.dumps({"error": "Goal description is required."})
+
+    try:
+        plan_dict = await _generate_plan(context, goal)
+        if not plan_dict or not plan_dict.get("steps"):
+            return json.dumps({"error": "Failed to generate a valid plan."})
+
+        plan_id = await context.save_plan_from_dict(plan_dict)
+
+        return json.dumps(
+            {
+                "success": True,
+                "plan_id": plan_id,
+                "title": plan_dict.get("title", "Untitled"),
+                "steps": [
+                    {
+                        "index": s.get("index", i + 1),
+                        "title": s.get("title", ""),
+                        "tool": s.get("tool", ""),
+                    }
+                    for i, s in enumerate(plan_dict.get("steps", []))
+                ],
+            }
+        )
+
+    except Exception as e:
+        logger.error("Plan creation failed: %s", e)
+        return json.dumps({"error": f"Plan creation failed: {e}"})
+
+
+async def _handle_plan_execute(
+    arguments: dict[str, Any],
+    context: PlanningContext,
+    model_manager: Any | None = None,
+    ui_manager: Any | None = None,
+) -> str:
+    """Execute a previously created plan."""
+    plan_id = arguments.get("plan_id", "")
+    if not plan_id:
+        return json.dumps({"error": "plan_id is required."})
+
+    plan_data = await context.get_plan(plan_id)
+    if not plan_data:
+        return json.dumps({"error": f"Plan not found: {plan_id}"})
+
+    return await _run_plan(context, plan_data, model_manager, ui_manager)
+
+
+async def _handle_plan_create_and_execute(
+    arguments: dict[str, Any],
+    context: PlanningContext,
+    model_manager: Any | None = None,
+    ui_manager: Any | None = None,
+) -> str:
+    """Generate a plan and execute it immediately."""
+    goal = arguments.get("goal", "")
+    if not goal:
+        return json.dumps({"error": "Goal description is required."})
+
+    # Show plan generation phase
+    if ui_manager:
+        try:
+            await ui_manager.start_tool_execution(
+                "plan_create_and_execute", {"phase": "generating plan..."}
+            )
+        except Exception:
+            pass
+
+    try:
+        plan_dict = await _generate_plan(context, goal)
+
+        # Finish the generation spinner
+        if ui_manager:
+            try:
+                steps = plan_dict.get("steps", []) if plan_dict else []
+                title = plan_dict.get("title", "Untitled") if plan_dict else "?"
+                await ui_manager.finish_tool_execution(
+                    result=f"Plan generated: {title} ({len(steps)} steps)",
+                    success=bool(plan_dict and steps),
+                )
+            except Exception:
+                pass
+
+        if not plan_dict or not plan_dict.get("steps"):
+            return json.dumps({"error": "Failed to generate a valid plan."})
+
+        # Save so it can be resumed if interrupted
+        plan_id = await context.save_plan_from_dict(plan_dict)
+        plan_data = await context.get_plan(plan_id)
+        if not plan_data:
+            return json.dumps({"error": "Failed to load saved plan."})
+
+        return await _run_plan(context, plan_data, model_manager, ui_manager)
+
+    except Exception as e:
+        # Make sure spinner is stopped on error
+        if ui_manager:
+            try:
+                await ui_manager.finish_tool_execution(result=str(e), success=False)
+            except Exception:
+                pass
+        logger.error("Plan create-and-execute failed: %s", e)
+        return json.dumps({"error": f"Plan create-and-execute failed: {e}"})
+
+
+async def _generate_plan(
+    context: PlanningContext,
+    goal: str,
+) -> dict[str, Any] | None:
+    """Generate a plan dict from a goal description using PlanAgent."""
+    from chuk_ai_planner.agents.plan_agent import PlanAgent
+
+    tool_catalog = await context.get_tool_catalog()
+    tool_names = [
+        t.get("function", {}).get("name", "")
+        for t in tool_catalog
+        if t.get("function", {}).get("name")
+    ]
+    if not tool_names:
+        return None
+
+    system_prompt = _build_plan_system_prompt(tool_catalog)
+
+    agent = PlanAgent(
+        system_prompt=system_prompt,
+        validate_step=lambda step: _validate_step(step, tool_names),
+        max_retries=2,
+    )
+
+    result: dict[str, Any] | None = await agent.plan(goal)
+    return result
+
+
+async def _run_plan(
+    context: PlanningContext,
+    plan_data: dict[str, Any],
+    model_manager: Any | None = None,
+    ui_manager: Any | None = None,
+) -> str:
+    """Execute a plan and return JSON results."""
+    from mcp_cli.planning.executor import PlanRunner
+
+    # Build per-step callbacks that drive the UI manager
+    async def on_tool_start(tool_name: str, arguments: dict) -> None:
+        if ui_manager:
+            try:
+                await ui_manager.start_tool_execution(tool_name, arguments)
+            except Exception:
+                pass
+
+    async def on_tool_complete(
+        tool_name: str, result_text: str, success: bool, elapsed: float
+    ) -> None:
+        if ui_manager:
+            try:
+                await ui_manager.finish_tool_execution(
+                    result=result_text, success=success
+                )
+            except Exception:
+                pass
+
+    runner = PlanRunner(
+        context,
+        model_manager=model_manager,
+        enable_guards=False,
+        on_tool_start=on_tool_start,
+        on_tool_complete=on_tool_complete,
+    )
+
+    result = await runner.execute_plan(plan_data, checkpoint=False)
+
+    response: dict[str, Any] = {
+        "success": result.success,
+        "plan_id": result.plan_id,
+        "title": result.plan_title,
+        "duration": round(result.total_duration, 2),
+        "steps_completed": len([s for s in result.steps if s.success]),
+        "steps_total": len(result.steps),
+    }
+
+    if result.error:
+        response["error"] = result.error
+
+    # Include variable results (the useful output)
+    if result.variables:
+        response["results"] = result.variables
+
+    # Include per-step summaries
+    response["steps"] = [
+        {
+            "index": s.step_index,
+            "title": s.step_title,
+            "tool": s.tool_name,
+            "success": s.success,
+            "error": s.error,
+        }
+        for s in result.steps
+    ]
+
+    return json.dumps(response, default=str)
+
+
+def _build_plan_system_prompt(tool_catalog: list[dict[str, Any]]) -> str:
+    """Build the system prompt for LLM plan generation.
+
+    Reuses the same logic as plan.py's _build_plan_system_prompt.
+    """
+    tool_lines = []
+    for tool in tool_catalog:
+        func = tool.get("function", {})
+        name = func.get("name", "?")
+        desc = func.get("description", "")
+        params = func.get("parameters", {})
+        props = params.get("properties", {})
+        required = params.get("required", [])
+
+        param_parts = []
+        for pname, pinfo in props.items():
+            ptype = pinfo.get("type", "any")
+            pdesc = pinfo.get("description", "")
+            req = " (required)" if pname in required else ""
+            param_parts.append(f"      {pname}: {ptype}{req} — {pdesc}")
+
+        params_str = "\n".join(param_parts) if param_parts else "      (no parameters)"
+        tool_lines.append(f"  {name}: {desc}\n    Parameters:\n{params_str}")
+
+    tools_text = "\n\n".join(tool_lines)
+
+    return f"""You are a planning assistant. Given a task description, create a structured execution plan.
+
+Available tools (with parameter schemas):
+
+{tools_text}
+
+Output a JSON object with this exact structure:
+{{
+  "title": "Short plan title",
+  "steps": [
+    {{
+      "title": "What this step does",
+      "tool": "tool_name",
+      "args": {{"arg1": "value1"}},
+      "depends_on": [],
+      "result_variable": "optional_var_name"
+    }}
+  ]
+}}
+
+Rules:
+- Only use tools from the available tools list above
+- Use the EXACT parameter names shown in the tool schemas
+- depends_on is a list of step indices (0-based) that must complete first
+- result_variable stores the output for use in later steps as ${{var_name}}
+- Keep plans focused — prefer fewer, targeted steps over many small ones
+- Each step should have exactly one tool call"""
+
+
+def _validate_step(step: dict[str, Any], tool_names: list[str]) -> tuple[bool, str]:
+    """Validate a plan step against available tools."""
+    tool = step.get("tool", "")
+    if tool not in tool_names:
+        return False, f"Unknown tool: {tool}. Available: {', '.join(tool_names[:10])}"
+    if not step.get("title"):
+        return False, "Step must have a title"
+    return True, ""
diff --git a/src/mcp_cli/tools/manager.py b/src/mcp_cli/tools/manager.py
index 11447a68..94867364 100644
--- a/src/mcp_cli/tools/manager.py
+++ b/src/mcp_cli/tools/manager.py
@@ -472,7 +472,10 @@ async def get_tool_by_name(
         if self._tool_index is None:
             await self._build_tool_index()
 
-        assert self._tool_index is not None  # for type checker
+        if (
+            self._tool_index is None
+        ):  # pragma: no cover — unreachable after _build_tool_index
+            return None
 
         if namespace:
             # Prefer fully-qualified lookup
diff --git a/tests/commands/models/test_server_models.py b/tests/commands/models/test_server_models.py
index d42b50e8..12633fb6 100644
--- a/tests/commands/models/test_server_models.py
+++ b/tests/commands/models/test_server_models.py
@@ -195,3 +195,19 @@ def test_to_display_string_with_experimental(self):
         assert "Tools" in display
         assert "Events*" in display
         assert "Streaming*" in display
+
+
+class TestServerActionParamsValidatorBranch:
+    """Cover the validator raise path (line 46)."""
+
+    def test_output_format_validator_rejects_xml(self):
+        with pytest.raises(ValidationError):
+            ServerActionParams(output_format="xml")
+
+
+class TestServerPerformanceInfoValidatorBranch:
+    """Cover the validator raise path (line 90)."""
+
+    def test_ping_ms_validator_rejects_negative(self):
+        with pytest.raises(ValidationError):
+            ServerPerformanceInfo(icon="X", latency="bad", ping_ms=-1.0)
diff --git a/tests/commands/plan/__init__.py b/tests/commands/plan/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/commands/plan/test_plan_command.py b/tests/commands/plan/test_plan_command.py
new file mode 100644
index 00000000..def3d0ab
--- /dev/null
+++ b/tests/commands/plan/test_plan_command.py
@@ -0,0 +1,1204 @@
+# tests/commands/plan/test_plan_command.py
+"""Tests for PlanCommand — plan CRUD via the unified command interface."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from mcp_cli.commands.base import CommandMode
+from mcp_cli.commands.plan.plan import PlanCommand, _planning_context_cache
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class FakeToolInfo:
+    name: str
+
+
+class FakeToolManager:
+    """Minimal ToolManager stub for PlanCommand tests."""
+
+    def __init__(self, tool_names: list[str] | None = None):
+        self._tool_names = tool_names or ["read_file", "write_file", "search_code"]
+
+    async def get_all_tools(self) -> list[FakeToolInfo]:
+        return [FakeToolInfo(name=n) for n in self._tool_names]
+
+    async def get_adapted_tools_for_llm(self, provider: str) -> list[dict[str, Any]]:
+        return [
+            {"type": "function", "function": {"name": n, "description": f"Tool: {n}"}}
+            for n in self._tool_names
+        ]
+
+
+SAMPLE_PLAN = {
+    "title": "Test Plan",
+    "steps": [
+        {
+            "title": "Read file",
+            "tool": "read_file",
+            "args": {"path": "/tmp/test.py"},
+            "depends_on": [],
+            "result_variable": "file_content",
+        },
+    ],
+}
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def cmd():
+    return PlanCommand()
+
+
+@pytest.fixture
+def tool_manager():
+    return FakeToolManager()
+
+
+@pytest.fixture(autouse=True)
+def _clear_context_cache():
+    """Clear the module-level planning context cache between tests."""
+    _planning_context_cache.clear()
+    yield
+    _planning_context_cache.clear()
+
+
+# ---------------------------------------------------------------------------
+# Properties
+# ---------------------------------------------------------------------------
+
+
+class TestPlanCommandProperties:
+    def test_name(self, cmd):
+        assert cmd.name == "plan"
+
+    def test_aliases(self, cmd):
+        assert "plans" in cmd.aliases
+
+    def test_description(self, cmd):
+        assert "plan" in cmd.description.lower()
+
+    def test_help_text(self, cmd):
+        text = cmd.help_text
+        assert "/plan" in text
+        assert "create" in text
+        assert "list" in text
+        assert "show" in text
+        assert "run" in text
+        assert "delete" in text
+        assert "resume" in text
+
+    def test_parameters(self, cmd):
+        names = {p.name for p in cmd.parameters}
+        assert "action" in names
+        assert "plan_id_or_description" in names
+
+    def test_modes(self, cmd):
+        assert cmd.modes == CommandMode.ALL
+
+
+# ---------------------------------------------------------------------------
+# execute() — no tool_manager
+# ---------------------------------------------------------------------------
+
+
+class TestPlanCommandNoToolManager:
+    @pytest.mark.asyncio
+    async def test_no_tool_manager_returns_error(self, cmd):
+        result = await cmd.execute(args="list")
+        assert result.success is False
+        assert "Tool manager not available" in result.error
+
+
+# ---------------------------------------------------------------------------
+# LIST action
+# ---------------------------------------------------------------------------
+
+
+class TestPlanListAction:
+    @pytest.mark.asyncio
+    async def test_list_empty(self, cmd, tool_manager, tmp_path):
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.list_plans = AsyncMock(return_value=[])
+            mock_ctx.return_value = ctx
+            with patch("mcp_cli.commands.plan.plan.output", create=True):
+                result = await cmd.execute(args="list", tool_manager=tool_manager)
+        assert result.success is True
+        assert "No saved plans" in result.output
+
+    @pytest.mark.asyncio
+    async def test_list_with_plans(self, cmd, tool_manager):
+        plans = [
+            {
+                "id": "abc12345-full-uuid",
+                "title": "Test Plan",
+                "steps": [{"title": "s1"}],
+            },
+        ]
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.list_plans = AsyncMock(return_value=plans)
+            mock_ctx.return_value = ctx
+            with patch("mcp_cli.commands.plan.plan.output", create=True):
+                with patch(
+                    "mcp_cli.commands.plan.plan.format_table",
+                    create=True,
+                    return_value=MagicMock(),
+                ):
+                    result = await cmd.execute(args="list", tool_manager=tool_manager)
+        assert result.success is True
+        assert result.data is not None
+        assert len(result.data) == 1
+
+    @pytest.mark.asyncio
+    async def test_default_action_is_list(self, cmd, tool_manager):
+        """No args defaults to list."""
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.list_plans = AsyncMock(return_value=[])
+            mock_ctx.return_value = ctx
+            with patch("mcp_cli.commands.plan.plan.output", create=True):
+                result = await cmd.execute(args="", tool_manager=tool_manager)
+        assert result.success is True
+
+
+# ---------------------------------------------------------------------------
+# SHOW action
+# ---------------------------------------------------------------------------
+
+
+class TestPlanShowAction:
+    @pytest.mark.asyncio
+    async def test_show_no_id(self, cmd, tool_manager):
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            mock_ctx.return_value = AsyncMock()
+            result = await cmd.execute(args="show", tool_manager=tool_manager)
+        assert result.success is False
+        assert "Plan ID required" in result.error
+
+    @pytest.mark.asyncio
+    async def test_show_not_found(self, cmd, tool_manager):
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=None)
+            mock_ctx.return_value = ctx
+            result = await cmd.execute(
+                args="show nonexistent", tool_manager=tool_manager
+            )
+        assert result.success is False
+        assert "Plan not found" in result.error
+
+    @pytest.mark.asyncio
+    async def test_show_found(self, cmd, tool_manager):
+        plan_data = {
+            "title": "My Plan",
+            "steps": [{"title": "s1", "tool": "read_file"}],
+        }
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=plan_data)
+            mock_ctx.return_value = ctx
+            with patch("mcp_cli.commands.plan.plan._display_plan"):
+                result = await cmd.execute(
+                    args="show abc123", tool_manager=tool_manager
+                )
+        assert result.success is True
+        assert result.data == plan_data
+
+
+# ---------------------------------------------------------------------------
+# DELETE action
+# ---------------------------------------------------------------------------
+
+
+class TestPlanDeleteAction:
+    @pytest.mark.asyncio
+    async def test_delete_no_id(self, cmd, tool_manager):
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            mock_ctx.return_value = AsyncMock()
+            result = await cmd.execute(args="delete", tool_manager=tool_manager)
+        assert result.success is False
+        assert "Plan ID required" in result.error
+
+    @pytest.mark.asyncio
+    async def test_delete_found(self, cmd, tool_manager):
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.delete_plan = AsyncMock(return_value=True)
+            mock_ctx.return_value = ctx
+            with patch("chuk_term.ui.output") as mock_out:
+                result = await cmd.execute(
+                    args="delete abc123", tool_manager=tool_manager
+                )
+        assert result.success is True
+        mock_out.success.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_delete_not_found(self, cmd, tool_manager):
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.delete_plan = AsyncMock(return_value=False)
+            mock_ctx.return_value = ctx
+            result = await cmd.execute(args="delete abc123", tool_manager=tool_manager)
+        assert result.success is False
+        assert "Plan not found" in result.error
+
+
+# ---------------------------------------------------------------------------
+# RUN action
+# ---------------------------------------------------------------------------
+
+
+class TestPlanRunAction:
+    @pytest.mark.asyncio
+    async def test_run_no_id(self, cmd, tool_manager):
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            mock_ctx.return_value = AsyncMock()
+            result = await cmd.execute(args="run", tool_manager=tool_manager)
+        assert result.success is False
+        assert "Plan ID required" in result.error
+
+    @pytest.mark.asyncio
+    async def test_run_not_found(self, cmd, tool_manager):
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=None)
+            mock_ctx.return_value = ctx
+            result = await cmd.execute(args="run abc123", tool_manager=tool_manager)
+        assert result.success is False
+        assert "Plan not found" in result.error
+
+    @pytest.mark.asyncio
+    async def test_run_dry_run_flag(self, cmd, tool_manager):
+        """--dry-run flag should be detected."""
+        plan_data = {"title": "My Plan", "steps": []}
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=plan_data)
+            mock_ctx.return_value = ctx
+
+            mock_result = MagicMock()
+            mock_result.success = True
+            mock_result.steps = []
+            mock_result.total_duration = 0.1
+
+            with patch("mcp_cli.planning.executor.PlanRunner") as mock_runner_cls:
+                mock_runner = AsyncMock()
+                mock_runner.execute_plan = AsyncMock(return_value=mock_result)
+                mock_runner_cls.return_value = mock_runner
+                with patch("chuk_term.ui.output"):
+                    result = await cmd.execute(
+                        args="run abc123 --dry-run", tool_manager=tool_manager
+                    )
+
+        assert result.success is True
+        # Verify dry_run=True was passed
+        call_kwargs = mock_runner.execute_plan.call_args
+        assert call_kwargs[1].get("dry_run") is True
+
+
+# ---------------------------------------------------------------------------
+# RESUME action
+# ---------------------------------------------------------------------------
+
+
+class TestPlanResumeAction:
+    @pytest.mark.asyncio
+    async def test_resume_no_id(self, cmd, tool_manager):
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            mock_ctx.return_value = AsyncMock()
+            result = await cmd.execute(args="resume", tool_manager=tool_manager)
+        assert result.success is False
+        assert "Plan ID required" in result.error
+
+    @pytest.mark.asyncio
+    async def test_resume_plan_not_found(self, cmd, tool_manager):
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=None)
+            mock_ctx.return_value = ctx
+            result = await cmd.execute(args="resume abc123", tool_manager=tool_manager)
+        assert result.success is False
+        assert "Plan not found" in result.error
+
+    @pytest.mark.asyncio
+    async def test_resume_no_checkpoint(self, cmd, tool_manager):
+        plan_data = {"title": "My Plan", "steps": []}
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=plan_data)
+            mock_ctx.return_value = ctx
+            with patch("mcp_cli.planning.executor.PlanRunner") as mock_runner_cls:
+                mock_runner = MagicMock()
+                mock_runner.load_checkpoint.return_value = None
+                mock_runner_cls.return_value = mock_runner
+                result = await cmd.execute(
+                    args="resume abc123", tool_manager=tool_manager
+                )
+        assert result.success is False
+        assert "No checkpoint found" in result.error
+
+
+# ---------------------------------------------------------------------------
+# CREATE action
+# ---------------------------------------------------------------------------
+
+
+class TestPlanCreateAction:
+    @pytest.mark.asyncio
+    async def test_create_no_description(self, cmd, tool_manager):
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            mock_ctx.return_value = AsyncMock()
+            result = await cmd.execute(args="create", tool_manager=tool_manager)
+        assert result.success is False
+        assert "Description required" in result.error
+
+    @pytest.mark.asyncio
+    async def test_create_no_tools(self, cmd, tool_manager):
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.get_tool_names = AsyncMock(return_value=[])
+            mock_ctx.return_value = ctx
+            with patch("mcp_cli.commands.plan.plan.output", create=True):
+                result = await cmd.execute(
+                    args="create do something", tool_manager=tool_manager
+                )
+        assert result.success is False
+        assert "No tools available" in result.error
+
+
+# ---------------------------------------------------------------------------
+# Unknown action
+# ---------------------------------------------------------------------------
+
+
+class TestPlanUnknownAction:
+    @pytest.mark.asyncio
+    async def test_unknown_action(self, cmd, tool_manager):
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            mock_ctx.return_value = AsyncMock()
+            result = await cmd.execute(args="bogus", tool_manager=tool_manager)
+        assert result.success is False
+        assert "Unknown action" in result.error
+
+
+# ---------------------------------------------------------------------------
+# Args parsing — list vs string
+# ---------------------------------------------------------------------------
+
+
+class TestPlanArgsParsing:
+    @pytest.mark.asyncio
+    async def test_args_as_list(self, cmd, tool_manager):
+        """Chat adapter passes args as a list."""
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.list_plans = AsyncMock(return_value=[])
+            mock_ctx.return_value = ctx
+            with patch("mcp_cli.commands.plan.plan.output", create=True):
+                result = await cmd.execute(args=["list"], tool_manager=tool_manager)
+        assert result.success is True
+
+    @pytest.mark.asyncio
+    async def test_args_as_string(self, cmd, tool_manager):
+        """Interactive adapter passes args as a string."""
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_ctx:
+            ctx = AsyncMock()
+            ctx.list_plans = AsyncMock(return_value=[])
+            mock_ctx.return_value = ctx
+            with patch("mcp_cli.commands.plan.plan.output", create=True):
+                result = await cmd.execute(args="list", tool_manager=tool_manager)
+        assert result.success is True
+
+
+# ---------------------------------------------------------------------------
+# _get_planning_context — lines 152-157
+# ---------------------------------------------------------------------------
+
+
+class TestGetPlanningContext:
+    @pytest.mark.asyncio
+    async def test_creates_planning_context(self, cmd, tool_manager):
+        """First call creates a new PlanningContext and caches it."""
+        with patch("mcp_cli.planning.context.PlanningContext") as mock_pc:
+            mock_instance = MagicMock()
+            mock_pc.return_value = mock_instance
+
+            result = await cmd._get_planning_context(tool_manager)
+
+            mock_pc.assert_called_once_with(tool_manager)
+            assert result is mock_instance
+
+    @pytest.mark.asyncio
+    async def test_caches_planning_context(self, cmd, tool_manager):
+        """Second call returns cached context, does not re-create."""
+        with patch("mcp_cli.planning.context.PlanningContext") as mock_pc:
+            mock_instance = MagicMock()
+            mock_pc.return_value = mock_instance
+
+            r1 = await cmd._get_planning_context(tool_manager)
+            r2 = await cmd._get_planning_context(tool_manager)
+
+            mock_pc.assert_called_once()  # Only created once
+            assert r1 is r2
+
+
+# ---------------------------------------------------------------------------
+# _create_plan success path — lines 208-243
+# ---------------------------------------------------------------------------
+
+
+class TestPlanCreateSuccess:
+    @pytest.mark.asyncio
+    async def test_create_plan_success(self, cmd, tool_manager):
+        """Full success path: agent generates plan, context saves it."""
+        tool_catalog = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "read_file",
+                    "description": "Read a file",
+                    "parameters": {
+                        "properties": {
+                            "path": {"type": "string", "description": "File path"}
+                        },
+                        "required": ["path"],
+                    },
+                },
+            }
+        ]
+        plan_dict = {
+            "title": "My Plan",
+            "steps": [
+                {
+                    "title": "Read file",
+                    "tool": "read_file",
+                    "args": {"path": "/tmp/test.py"},
+                    "depends_on": [],
+                    "result_variable": "fc",
+                }
+            ],
+        }
+
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_get_ctx:
+            ctx = AsyncMock()
+            ctx.get_tool_catalog = AsyncMock(return_value=tool_catalog)
+            ctx.save_plan_from_dict = AsyncMock(return_value="plan-abc-123")
+            ctx.get_plan = AsyncMock(return_value=plan_dict)
+            mock_get_ctx.return_value = ctx
+
+            with patch("chuk_ai_planner.agents.plan_agent.PlanAgent") as mock_agent_cls:
+                mock_agent = AsyncMock()
+                mock_agent.plan = AsyncMock(return_value=plan_dict)
+                mock_agent_cls.return_value = mock_agent
+
+                with patch("chuk_term.ui.output"):
+                    with patch("mcp_cli.commands.plan.plan._display_plan"):
+                        result = await cmd.execute(
+                            args="create read the config file",
+                            tool_manager=tool_manager,
+                        )
+
+        assert result.success is True
+        assert result.data["plan_id"] == "plan-abc-123"
+        assert "1 steps" in result.output
+        ctx.save_plan_from_dict.assert_awaited_once_with(plan_dict)
+
+    @pytest.mark.asyncio
+    async def test_create_plan_agent_returns_empty(self, cmd, tool_manager):
+        """Agent returns empty plan → error."""
+        tool_catalog = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "read_file",
+                    "description": "Read a file",
+                    "parameters": {"properties": {}, "required": []},
+                },
+            }
+        ]
+
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_get_ctx:
+            ctx = AsyncMock()
+            ctx.get_tool_catalog = AsyncMock(return_value=tool_catalog)
+            mock_get_ctx.return_value = ctx
+
+            with patch("chuk_ai_planner.agents.plan_agent.PlanAgent") as mock_agent_cls:
+                mock_agent = AsyncMock()
+                mock_agent.plan = AsyncMock(return_value={"steps": []})
+                mock_agent_cls.return_value = mock_agent
+
+                with patch("chuk_term.ui.output"):
+                    result = await cmd.execute(
+                        args="create do stuff", tool_manager=tool_manager
+                    )
+
+        assert result.success is False
+        assert "Failed to generate" in result.error
+
+    @pytest.mark.asyncio
+    async def test_create_plan_agent_exception(self, cmd, tool_manager):
+        """Agent raises exception → error."""
+        tool_catalog = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "read_file",
+                    "description": "Read a file",
+                    "parameters": {"properties": {}, "required": []},
+                },
+            }
+        ]
+
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_get_ctx:
+            ctx = AsyncMock()
+            ctx.get_tool_catalog = AsyncMock(return_value=tool_catalog)
+            mock_get_ctx.return_value = ctx
+
+            with patch("chuk_ai_planner.agents.plan_agent.PlanAgent") as mock_agent_cls:
+                mock_agent = AsyncMock()
+                mock_agent.plan = AsyncMock(side_effect=RuntimeError("LLM failed"))
+                mock_agent_cls.return_value = mock_agent
+
+                with patch("chuk_term.ui.output"):
+                    result = await cmd.execute(
+                        args="create do stuff", tool_manager=tool_manager
+                    )
+
+        assert result.success is False
+        assert "Plan creation failed" in result.error
+
+
+# ---------------------------------------------------------------------------
+# _run_plan callbacks, success, failure — lines 287-323
+# ---------------------------------------------------------------------------
+
+
+class TestPlanRunCallbacks:
+    """Test _run_plan with step callbacks and success/failure paths."""
+
+    def _make_exec_result(self, *, success=True, error=None):
+        """Create a mock PlanExecutionResult."""
+        r = MagicMock()
+        r.success = success
+        r.steps = [MagicMock()]
+        r.total_duration = 1.5
+        r.variables = {"v1": "val"}
+        r.error = error
+        return r
+
+    @pytest.mark.asyncio
+    async def test_run_success_with_callbacks(self, cmd, tool_manager):
+        """Success path: result.success=True, output.success is called."""
+        plan_data = {
+            "title": "My Plan",
+            "steps": [{"title": "s1", "tool": "read_file", "depends_on": []}],
+        }
+        exec_result = self._make_exec_result(success=True)
+
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_get_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=plan_data)
+            mock_get_ctx.return_value = ctx
+
+            with patch("mcp_cli.planning.executor.PlanRunner") as mock_runner_cls:
+                mock_runner = AsyncMock()
+                mock_runner.execute_plan = AsyncMock(return_value=exec_result)
+                mock_runner_cls.return_value = mock_runner
+
+                with patch("chuk_term.ui.output") as mock_output:
+                    result = await cmd.execute(
+                        args="run abc123", tool_manager=tool_manager
+                    )
+
+        assert result.success is True
+        assert "completed" in result.output
+        mock_output.success.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_run_failure(self, cmd, tool_manager):
+        """Failure path: result.success=False, output.error is called."""
+        plan_data = {
+            "title": "My Plan",
+            "steps": [{"title": "s1", "tool": "read_file", "depends_on": []}],
+        }
+        exec_result = self._make_exec_result(success=False, error="Step 1 timed out")
+
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_get_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=plan_data)
+            mock_get_ctx.return_value = ctx
+
+            with patch("mcp_cli.planning.executor.PlanRunner") as mock_runner_cls:
+                mock_runner = AsyncMock()
+                mock_runner.execute_plan = AsyncMock(return_value=exec_result)
+                mock_runner_cls.return_value = mock_runner
+
+                with patch("chuk_term.ui.output") as mock_output:
+                    result = await cmd.execute(
+                        args="run abc123", tool_manager=tool_manager
+                    )
+
+        assert result.success is False
+        assert "failed" in result.output
+        mock_output.error.assert_called_once()
+        assert "Step 1 timed out" in mock_output.error.call_args[0][0]
+
+    @pytest.mark.asyncio
+    async def test_run_step_callbacks_invoked(self, cmd, tool_manager):
+        """Verify on_step_start / on_step_complete callbacks are wired."""
+        plan_data = {"title": "P", "steps": [{"title": "s1"}]}
+        exec_result = self._make_exec_result(success=True)
+
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_get_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=plan_data)
+            mock_get_ctx.return_value = ctx
+
+            with patch("mcp_cli.planning.executor.PlanRunner") as mock_runner_cls:
+                mock_runner = AsyncMock()
+                mock_runner.execute_plan = AsyncMock(return_value=exec_result)
+                mock_runner_cls.return_value = mock_runner
+
+                with patch("chuk_term.ui.output"):
+                    await cmd.execute(args="run abc123", tool_manager=tool_manager)
+
+                # Check PlanRunner was constructed with callbacks
+                init_kwargs = mock_runner_cls.call_args[1]
+                assert callable(init_kwargs["on_step_start"])
+                assert callable(init_kwargs["on_step_complete"])
+                assert callable(init_kwargs["on_tool_start"])
+                assert callable(init_kwargs["on_tool_complete"])
+
+    @pytest.mark.asyncio
+    async def test_on_step_start_callback(self, cmd, tool_manager):
+        """Exercise the on_step_start callback."""
+        plan_data = {"title": "P", "steps": []}
+        exec_result = self._make_exec_result(success=True)
+
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_get_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=plan_data)
+            mock_get_ctx.return_value = ctx
+
+            captured_callbacks = {}
+
+            with patch("mcp_cli.planning.executor.PlanRunner") as mock_runner_cls:
+                mock_runner = AsyncMock()
+                mock_runner.execute_plan = AsyncMock(return_value=exec_result)
+
+                def capture_init(*args, **kwargs):
+                    captured_callbacks.update(kwargs)
+                    return mock_runner
+
+                mock_runner_cls.side_effect = capture_init
+
+                with patch("chuk_term.ui.output") as mock_output:
+                    await cmd.execute(args="run abc123", tool_manager=tool_manager)
+
+                    # Call the captured callback
+                    captured_callbacks["on_step_start"]("1", "Read file", "read_file")
+                    mock_output.info.assert_any_call("  Step 1: Read file")
+
+    @pytest.mark.asyncio
+    async def test_on_step_complete_failure_callback(self, cmd, tool_manager):
+        """Exercise on_step_complete callback with a failed step."""
+        plan_data = {"title": "P", "steps": []}
+        exec_result = self._make_exec_result(success=True)
+
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_get_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=plan_data)
+            mock_get_ctx.return_value = ctx
+
+            captured_callbacks = {}
+
+            with patch("mcp_cli.planning.executor.PlanRunner") as mock_runner_cls:
+                mock_runner = AsyncMock()
+                mock_runner.execute_plan = AsyncMock(return_value=exec_result)
+
+                def capture_init(*args, **kwargs):
+                    captured_callbacks.update(kwargs)
+                    return mock_runner
+
+                mock_runner_cls.side_effect = capture_init
+
+                with patch("chuk_term.ui.output") as mock_output:
+                    await cmd.execute(args="run abc123", tool_manager=tool_manager)
+
+                    # Create a failed step result
+                    step_result = MagicMock()
+                    step_result.success = False
+                    step_result.step_index = "2"
+                    step_result.error = "timeout"
+                    captured_callbacks["on_step_complete"](step_result)
+                    mock_output.error.assert_any_call("  Step 2 failed: timeout")
+
+    @pytest.mark.asyncio
+    async def test_on_tool_start_with_display(self, cmd, tool_manager):
+        """Exercise on_tool_start callback with a display manager."""
+        plan_data = {"title": "P", "steps": []}
+        exec_result = self._make_exec_result(success=True)
+
+        mock_display = AsyncMock()
+        mock_ui_manager = MagicMock()
+        mock_ui_manager.display = mock_display
+
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_get_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=plan_data)
+            mock_get_ctx.return_value = ctx
+
+            captured_callbacks = {}
+
+            with patch("mcp_cli.planning.executor.PlanRunner") as mock_runner_cls:
+                mock_runner = AsyncMock()
+                mock_runner.execute_plan = AsyncMock(return_value=exec_result)
+
+                def capture_init(*args, **kwargs):
+                    captured_callbacks.update(kwargs)
+                    return mock_runner
+
+                mock_runner_cls.side_effect = capture_init
+
+                with patch("chuk_term.ui.output"):
+                    await cmd.execute(
+                        args="run abc123",
+                        tool_manager=tool_manager,
+                        ui_manager=mock_ui_manager,
+                    )
+
+                    # Call the async callback
+                    await captured_callbacks["on_tool_start"](
+                        "read_file", {"path": "/tmp"}
+                    )
+                    mock_display.start_tool_execution.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_on_tool_complete_with_display(self, cmd, tool_manager):
+        """Exercise on_tool_complete callback with a display manager."""
+        plan_data = {"title": "P", "steps": []}
+        exec_result = self._make_exec_result(success=True)
+
+        mock_display = AsyncMock()
+        mock_ui_manager = MagicMock()
+        mock_ui_manager.display = mock_display
+
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_get_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=plan_data)
+            mock_get_ctx.return_value = ctx
+
+            captured_callbacks = {}
+
+            with patch("mcp_cli.planning.executor.PlanRunner") as mock_runner_cls:
+                mock_runner = AsyncMock()
+                mock_runner.execute_plan = AsyncMock(return_value=exec_result)
+
+                def capture_init(*args, **kwargs):
+                    captured_callbacks.update(kwargs)
+                    return mock_runner
+
+                mock_runner_cls.side_effect = capture_init
+
+                with patch("chuk_term.ui.output"):
+                    await cmd.execute(
+                        args="run abc123",
+                        tool_manager=tool_manager,
+                        ui_manager=mock_ui_manager,
+                    )
+
+                    await captured_callbacks["on_tool_complete"](
+                        "read_file", "ok", True, 0.5
+                    )
+                    mock_display.stop_tool_execution.assert_awaited_once_with(
+                        "ok", True
+                    )
+
+
+# ---------------------------------------------------------------------------
+# _resume_plan with checkpoint — lines 371-394
+# ---------------------------------------------------------------------------
+
+
+class TestPlanResumeWithCheckpoint:
+    @pytest.mark.asyncio
+    async def test_resume_success(self, cmd, tool_manager):
+        """Resume with checkpoint found, execution succeeds."""
+        plan_data = {
+            "title": "My Plan",
+            "steps": [
+                {"index": "1", "title": "s1", "tool": "a", "depends_on": []},
+                {"index": "2", "title": "s2", "tool": "b", "depends_on": ["1"]},
+            ],
+        }
+        checkpoint = {
+            "completed_steps": ["1"],
+            "variables": {"v1": "data"},
+        }
+        exec_result = MagicMock()
+        exec_result.success = True
+        exec_result.error = None
+
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_get_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=plan_data)
+            mock_get_ctx.return_value = ctx
+
+            with patch("mcp_cli.planning.executor.PlanRunner") as mock_runner_cls:
+                mock_runner = MagicMock()
+                mock_runner.load_checkpoint = MagicMock(return_value=checkpoint)
+                mock_runner.execute_plan = AsyncMock(return_value=exec_result)
+                mock_runner_cls.return_value = mock_runner
+
+                with patch("chuk_term.ui.output") as mock_output:
+                    result = await cmd.execute(
+                        args="resume abc123", tool_manager=tool_manager
+                    )
+
+        assert result.success is True
+        mock_output.success.assert_called_once()
+        # Verify execute_plan was called with remaining steps only
+        call_args = mock_runner.execute_plan.call_args
+        plan_arg = call_args[0][0]
+        assert len(plan_arg["steps"]) == 1  # Only step 2 remains
+        assert call_args[1]["variables"] == {"v1": "data"}
+
+    @pytest.mark.asyncio
+    async def test_resume_failure(self, cmd, tool_manager):
+        """Resume with checkpoint found, execution fails."""
+        plan_data = {
+            "title": "Fail Plan",
+            "steps": [
+                {"index": "1", "title": "s1", "tool": "a", "depends_on": []},
+                {"index": "2", "title": "s2", "tool": "b", "depends_on": ["1"]},
+            ],
+        }
+        checkpoint = {
+            "completed_steps": ["1"],
+            "variables": {},
+        }
+        exec_result = MagicMock()
+        exec_result.success = False
+        exec_result.error = "step 2 blew up"
+
+        with patch(
+            "mcp_cli.commands.plan.plan.PlanCommand._get_planning_context"
+        ) as mock_get_ctx:
+            ctx = AsyncMock()
+            ctx.get_plan = AsyncMock(return_value=plan_data)
+            mock_get_ctx.return_value = ctx
+
+            with patch("mcp_cli.planning.executor.PlanRunner") as mock_runner_cls:
+                mock_runner = MagicMock()
+                mock_runner.load_checkpoint = MagicMock(return_value=checkpoint)
+                mock_runner.execute_plan = AsyncMock(return_value=exec_result)
+                mock_runner_cls.return_value = mock_runner
+
+                with patch("chuk_term.ui.output") as mock_output:
+                    result = await cmd.execute(
+                        args="resume abc123", tool_manager=tool_manager
+                    )
+
+        assert result.success is False
+        mock_output.error.assert_called_once()
+        assert "step 2 blew up" in mock_output.error.call_args[0][0]
+
+
+# ---------------------------------------------------------------------------
+# _build_plan_system_prompt — lines 405-426
+# ---------------------------------------------------------------------------
+
+
+class TestBuildPlanSystemPrompt:
+    def test_basic_prompt(self):
+        from mcp_cli.commands.plan.plan import _build_plan_system_prompt
+
+        catalog = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "read_file",
+                    "description": "Read a file from disk",
+                    "parameters": {
+                        "properties": {
+                            "path": {
+                                "type": "string",
+                                "description": "File path to read",
+                            }
+                        },
+                        "required": ["path"],
+                    },
+                },
+            }
+        ]
+        prompt = _build_plan_system_prompt(catalog)
+
+        assert "read_file" in prompt
+        assert "Read a file from disk" in prompt
+        assert "path: string (required)" in prompt
+        assert "File path to read" in prompt
+        assert "planning assistant" in prompt
+        assert "JSON object" in prompt
+
+    def test_tool_without_params(self):
+        from mcp_cli.commands.plan.plan import _build_plan_system_prompt
+
+        catalog = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "no_args_tool",
+                    "description": "Takes nothing",
+                    "parameters": {"properties": {}, "required": []},
+                },
+            }
+        ]
+        prompt = _build_plan_system_prompt(catalog)
+
+        assert "no_args_tool" in prompt
+        assert "(no parameters)" in prompt
+
+    def test_multiple_tools(self):
+        from mcp_cli.commands.plan.plan import _build_plan_system_prompt
+
+        catalog = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "tool_a",
+                    "description": "Tool A",
+                    "parameters": {"properties": {}, "required": []},
+                },
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "tool_b",
+                    "description": "Tool B",
+                    "parameters": {
+                        "properties": {
+                            "x": {"type": "integer", "description": "A number"},
+                        },
+                        "required": [],
+                    },
+                },
+            },
+        ]
+        prompt = _build_plan_system_prompt(catalog)
+
+        assert "tool_a" in prompt
+        assert "tool_b" in prompt
+        assert "x: integer" in prompt
+
+    def test_optional_param(self):
+        from mcp_cli.commands.plan.plan import _build_plan_system_prompt
+
+        catalog = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "my_tool",
+                    "description": "desc",
+                    "parameters": {
+                        "properties": {
+                            "opt": {"type": "string", "description": "optional arg"},
+                        },
+                        "required": [],  # Not required
+                    },
+                },
+            }
+        ]
+        prompt = _build_plan_system_prompt(catalog)
+
+        # Should NOT contain "(required)"
+        assert "opt: string —" in prompt
+        assert "(required)" not in prompt.split("opt")[1].split("\n")[0]
+
+
+# ---------------------------------------------------------------------------
+# _validate_step — lines 457-462
+# ---------------------------------------------------------------------------
+
+
+class TestValidateStep:
+    def test_valid_step(self):
+        from mcp_cli.commands.plan.plan import _validate_step
+
+        step = {"tool": "read_file", "title": "Read it"}
+        ok, msg = _validate_step(step, ["read_file", "write_file"])
+        assert ok is True
+        assert msg == ""
+
+    def test_unknown_tool(self):
+        from mcp_cli.commands.plan.plan import _validate_step
+
+        step = {"tool": "hack_planet", "title": "Hack"}
+        ok, msg = _validate_step(step, ["read_file", "write_file"])
+        assert ok is False
+        assert "Unknown tool" in msg
+        assert "hack_planet" in msg
+
+    def test_missing_title(self):
+        from mcp_cli.commands.plan.plan import _validate_step
+
+        step = {"tool": "read_file", "title": ""}
+        ok, msg = _validate_step(step, ["read_file"])
+        assert ok is False
+        assert "title" in msg.lower()
+
+    def test_missing_tool_key(self):
+        from mcp_cli.commands.plan.plan import _validate_step
+
+        step = {"title": "No tool"}
+        ok, msg = _validate_step(step, ["read_file"])
+        assert ok is False
+        assert "Unknown tool" in msg
+
+
+# ---------------------------------------------------------------------------
+# _display_plan — lines 467-482
+# ---------------------------------------------------------------------------
+
+
+class TestDisplayPlan:
+    def test_display_plan_basic(self):
+        from mcp_cli.commands.plan.plan import _display_plan
+
+        plan_data = {
+            "title": "Test Plan",
+            "steps": [
+                {"title": "Read file", "tool": "read_file", "depends_on": []},
+            ],
+        }
+        with patch("chuk_term.ui.output") as mock_output:
+            with patch(
+                "mcp_cli.planning.executor.render_plan_dag", return_value="DAG-VIZ"
+            ):
+                _display_plan(plan_data)
+
+        # Check title and DAG were printed
+        calls = [str(c) for c in mock_output.info.call_args_list]
+        assert any("Test Plan" in c and "1 steps" in c for c in calls)
+        assert any("DAG-VIZ" in c for c in calls)
+
+    def test_display_plan_with_result_variables(self):
+        from mcp_cli.commands.plan.plan import _display_plan
+
+        plan_data = {
+            "title": "Var Plan",
+            "steps": [
+                {
+                    "title": "Read",
+                    "tool": "read_file",
+                    "depends_on": [],
+                    "result_variable": "content",
+                },
+                {
+                    "title": "Search",
+                    "tool": "search_code",
+                    "depends_on": ["1"],
+                    "result_variable": "results",
+                },
+            ],
+        }
+        with patch("chuk_term.ui.output") as mock_output:
+            with patch("mcp_cli.planning.executor.render_plan_dag", return_value="DAG"):
+                _display_plan(plan_data)
+
+        # Should display variables
+        calls = [str(c) for c in mock_output.info.call_args_list]
+        assert any("content" in c and "results" in c for c in calls)
+
+    def test_display_plan_no_result_variables(self):
+        from mcp_cli.commands.plan.plan import _display_plan
+
+        plan_data = {
+            "title": "No Vars",
+            "steps": [
+                {"title": "s1", "tool": "t1", "depends_on": []},
+            ],
+        }
+        with patch("chuk_term.ui.output") as mock_output:
+            with patch("mcp_cli.planning.executor.render_plan_dag", return_value="DAG"):
+                _display_plan(plan_data)
+
+        # "Variables:" should NOT appear (no result_variable keys)
+        calls = " ".join(str(c) for c in mock_output.info.call_args_list)
+        assert "Variables:" not in calls
diff --git a/tests/planning/__init__.py b/tests/planning/__init__.py
new file mode 100644
index 00000000..371aa953
--- /dev/null
+++ b/tests/planning/__init__.py
@@ -0,0 +1 @@
+"""Tests for the planning module."""
diff --git a/tests/planning/test_agentic_loop_integration.py b/tests/planning/test_agentic_loop_integration.py
new file mode 100644
index 00000000..7241479f
--- /dev/null
+++ b/tests/planning/test_agentic_loop_integration.py
@@ -0,0 +1,441 @@
+# tests/planning/test_agentic_loop_integration.py
+"""Integration test proving the agentic loop works end-to-end.
+
+Simulates a real plan execution where:
+1. Geocode returns empty results → LLM retries with simpler name
+2. Weather tool gets wrong types → LLM corrects and retries
+3. Both steps succeed through the agentic loop
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+from dataclasses import dataclass
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from mcp_cli.planning.executor import PlanRunner, ModelManagerProtocol
+from mcp_cli.planning.context import PlanningContext
+
+
+# ── Fake MCP Tool Backend ──────────────────────────────────────────────────
+
+
+@dataclass
+class FakeToolCallResult:
+    tool_name: str
+    success: bool = True
+    result: Any = None
+    error: str | None = None
+
+
+class SimulatedToolManager:
+    """Simulates real MCP tool behavior including validation errors."""
+
+    def __init__(self):
+        self.calls: list[tuple[str, dict]] = []
+
+    def get_all_tools(self):
+        return []
+
+    async def get_adapted_tools_for_llm(self, provider: str = "openai"):
+        """Return tool catalog matching real MCP servers."""
+        return [
+            {
+                "type": "function",
+                "function": {
+                    "name": "geocode_location",
+                    "description": "Geocode a location name to coordinates",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "name": {
+                                "type": "string",
+                                "description": "Location name to geocode",
+                            }
+                        },
+                        "required": ["name"],
+                    },
+                },
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather_forecast",
+                    "description": "Get weather forecast for coordinates",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "latitude": {
+                                "type": "number",
+                                "description": "Latitude",
+                            },
+                            "longitude": {
+                                "type": "number",
+                                "description": "Longitude",
+                            },
+                        },
+                        "required": ["latitude", "longitude"],
+                    },
+                },
+            },
+        ]
+
+    async def execute_tool(
+        self,
+        tool_name: str,
+        arguments: dict[str, Any],
+        namespace: str | None = None,
+        timeout: float | None = None,
+    ) -> FakeToolCallResult:
+        self.calls.append((tool_name, arguments))
+
+        if tool_name == "geocode_location":
+            name = arguments.get("name", "")
+            # "Leavenheath, Suffolk" returns null (too specific)
+            if "suffolk" in name.lower() or "," in name:
+                return FakeToolCallResult(
+                    tool_name=tool_name,
+                    result={"results": None, "generationtime_ms": 0.9},
+                )
+            # Simpler "Leavenheath" works
+            return FakeToolCallResult(
+                tool_name=tool_name,
+                result={
+                    "results": [
+                        {
+                            "name": "Leavenheath",
+                            "latitude": 52.0,
+                            "longitude": 0.85,
+                            "timezone": "Europe/London",
+                        }
+                    ]
+                },
+            )
+
+        if tool_name == "get_weather_forecast":
+            lat = arguments.get("latitude")
+            lon = arguments.get("longitude")
+            # Simulate MCP server type validation
+            if isinstance(lat, str) or isinstance(lon, str):
+                return FakeToolCallResult(
+                    tool_name=tool_name,
+                    success=False,
+                    error=(
+                        "ParameterValidationError: Invalid parameter "
+                        "'latitude': expected number, got str"
+                    ),
+                )
+            return FakeToolCallResult(
+                tool_name=tool_name,
+                result={
+                    "current_weather": {
+                        "temperature": 12.3,
+                        "windspeed": 15.2,
+                        "weathercode": 3,
+                    }
+                },
+            )
+
+        return FakeToolCallResult(
+            tool_name=tool_name,
+            success=False,
+            error=f"Unknown tool: {tool_name}",
+        )
+
+
+# ── LLM Response Builders ──────────────────────────────────────────────────
+
+
+def _tool_call_response(name: str, args: dict) -> dict:
+    """Build a mock LLM response containing a tool call.
+
+    Uses the chuk_llm native format (top-level tool_calls key),
+    which is what the real client returns with stream=False.
+    """
+    return {
+        "response": None,
+        "tool_calls": [
+            {
+                "id": "call_1",
+                "type": "function",
+                "function": {
+                    "name": name,
+                    "arguments": json.dumps(args),
+                },
+            }
+        ],
+        "usage": {"prompt_tokens": 50, "completion_tokens": 15},
+    }
+
+
+def _text_response(text: str = "Step complete.") -> dict:
+    """Build a mock LLM response with text only (step done).
+
+    Uses the chuk_llm native format (top-level response key).
+    """
+    return {
+        "response": text,
+        "tool_calls": None,
+        "usage": {"prompt_tokens": 100, "completion_tokens": 20},
+    }
+
+
+# ── Integration Tests ──────────────────────────────────────────────────────
+
+
+class TestAgenticLoopIntegration:
+    """Prove the agentic loop handles real-world scenarios."""
+
+    @pytest.mark.asyncio
+    async def test_geocode_retry_on_empty_result(self, tmp_path):
+        """Geocode returns null results → LLM retries with simpler name."""
+        tm = SimulatedToolManager()
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        client = AsyncMock()
+        client.create_completion = AsyncMock(
+            side_effect=[
+                # Turn 1: LLM tries "Leavenheath, Suffolk" (returns null)
+                _tool_call_response(
+                    "geocode_location", {"name": "Leavenheath, Suffolk"}
+                ),
+                # Turn 2: LLM sees null results, tries simpler "Leavenheath"
+                _tool_call_response("geocode_location", {"name": "Leavenheath"}),
+                # Turn 3: LLM sees valid results, signals done
+                _text_response("Geocoded: lat=52.0, lon=0.85"),
+            ]
+        )
+
+        mm = MagicMock(spec=ModelManagerProtocol)
+        mm.get_client.return_value = client
+
+        runner = PlanRunner(context, model_manager=mm, enable_guards=False)
+
+        plan = {
+            "id": "geo-test",
+            "title": "Geocode Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Geocode Leavenheath",
+                    "tool": "geocode_location",
+                    "args": {"name": "Leavenheath, Suffolk"},
+                    "result_variable": "geocode_result",
+                },
+            ],
+        }
+
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        assert result.success, f"Expected success, got error: {result.error}"
+        assert result.steps[0].success
+
+        # Should have the good result (second call)
+        geo = result.variables["geocode_result"]
+        assert geo["results"] is not None
+        assert geo["results"][0]["latitude"] == 52.0
+
+        # Verify two geocode calls were made
+        geocode_calls = [c for c in tm.calls if c[0] == "geocode_location"]
+        assert len(geocode_calls) == 2
+        assert geocode_calls[0][1]["name"] == "Leavenheath, Suffolk"
+        assert geocode_calls[1][1]["name"] == "Leavenheath"
+
+        # Verify LLM saw 3 turns (2 tool calls + 1 text response)
+        assert client.create_completion.call_count == 3
+
+    @pytest.mark.asyncio
+    async def test_weather_retry_on_type_error(self, tmp_path):
+        """Weather tool rejects string types → LLM retries with numbers."""
+        tm = SimulatedToolManager()
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        client = AsyncMock()
+        client.create_completion = AsyncMock(
+            side_effect=[
+                # Turn 1: LLM passes lat/lon as strings (error)
+                _tool_call_response(
+                    "get_weather_forecast",
+                    {"latitude": "52.0", "longitude": "0.85"},
+                ),
+                # Turn 2: LLM sees error, retries with numbers
+                _tool_call_response(
+                    "get_weather_forecast",
+                    {"latitude": 52.0, "longitude": 0.85},
+                ),
+                # Turn 3: LLM sees weather data, signals done
+                _text_response("Current temp: 12.3°C"),
+            ]
+        )
+
+        mm = MagicMock(spec=ModelManagerProtocol)
+        mm.get_client.return_value = client
+
+        runner = PlanRunner(context, model_manager=mm, enable_guards=False)
+
+        plan = {
+            "id": "weather-test",
+            "title": "Weather Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Get weather",
+                    "tool": "get_weather_forecast",
+                    "args": {"latitude": 52.0, "longitude": 0.85},
+                    "result_variable": "weather",
+                },
+            ],
+        }
+
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        assert result.success, f"Expected success, got error: {result.error}"
+        weather = result.variables["weather"]
+        assert weather["current_weather"]["temperature"] == 12.3
+
+        # First call with strings failed, second with numbers succeeded
+        weather_calls = [c for c in tm.calls if c[0] == "get_weather_forecast"]
+        assert len(weather_calls) == 2
+        assert isinstance(weather_calls[0][1]["latitude"], str)  # First: string
+        assert isinstance(weather_calls[1][1]["latitude"], float)  # Second: number
+
+    @pytest.mark.asyncio
+    async def test_full_two_step_plan_with_agentic_retries(self, tmp_path):
+        """Full plan: geocode (retry empty) → weather (retry types)."""
+        tm = SimulatedToolManager()
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        client = AsyncMock()
+        client.create_completion = AsyncMock(
+            side_effect=[
+                # Step 1, Turn 1: geocode with full name (null)
+                _tool_call_response(
+                    "geocode_location", {"name": "Leavenheath, Suffolk"}
+                ),
+                # Step 1, Turn 2: geocode with simpler name (success)
+                _tool_call_response("geocode_location", {"name": "Leavenheath"}),
+                # Step 1, Turn 3: LLM signals done
+                _text_response("Found: lat=52.0, lon=0.85"),
+                # Step 2, Turn 1: weather with numbers (success first try)
+                _tool_call_response(
+                    "get_weather_forecast",
+                    {"latitude": 52.0, "longitude": 0.85},
+                ),
+                # Step 2, Turn 2: LLM sees result, signals done
+                _text_response("Weather: 12.3°C"),
+            ]
+        )
+
+        mm = MagicMock(spec=ModelManagerProtocol)
+        mm.get_client.return_value = client
+
+        step_starts = []
+        step_completions = []
+        tool_starts = []
+        tool_completions = []
+
+        runner = PlanRunner(
+            context,
+            model_manager=mm,
+            on_step_start=lambda i, t, tn: step_starts.append((i, t, tn)),
+            on_step_complete=lambda sr: step_completions.append(
+                (sr.step_index, sr.success)
+            ),
+            on_tool_start=lambda name, args: tool_starts.append((name, args)),
+            on_tool_complete=lambda name, result, ok, elapsed: tool_completions.append(
+                (name, ok)
+            ),
+            enable_guards=False,
+        )
+
+        plan = {
+            "id": "full-test",
+            "title": "Weather for Leavenheath",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Geocode Leavenheath",
+                    "tool": "geocode_location",
+                    "args": {"name": "Leavenheath, Suffolk"},
+                    "depends_on": [],
+                    "result_variable": "geocode_result",
+                },
+                {
+                    "index": "2",
+                    "title": "Get current weather",
+                    "tool": "get_weather_forecast",
+                    "args": {
+                        "latitude": "${geocode_result.results.0.latitude}",
+                        "longitude": "${geocode_result.results.0.longitude}",
+                    },
+                    "depends_on": ["1"],
+                    "result_variable": "weather",
+                },
+            ],
+        }
+
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        # Both steps should succeed
+        assert result.success, f"Plan failed: {result.error}"
+        assert len(result.steps) == 2
+        assert result.steps[0].success
+        assert result.steps[1].success
+
+        # Variables should contain the results
+        assert result.variables["geocode_result"]["results"][0]["latitude"] == 52.0
+        assert result.variables["weather"]["current_weather"]["temperature"] == 12.3
+
+        # Callbacks should have fired
+        assert len(step_starts) == 2
+        assert len(step_completions) == 2
+        assert step_completions[0] == ("1", True)
+        assert step_completions[1] == ("2", True)
+
+        # Total tool calls: 2 geocode + 1 weather = 3
+        assert len(tm.calls) == 3
+
+        # Tool-level callbacks should have fired for each tool call
+        assert len(tool_starts) == 3
+        assert tool_starts[0][0] == "geocode_location"
+        assert tool_starts[1][0] == "geocode_location"
+        assert tool_starts[2][0] == "get_weather_forecast"
+
+        assert len(tool_completions) == 3
+        assert tool_completions[0] == ("geocode_location", True)
+        assert tool_completions[1] == ("geocode_location", True)
+        assert tool_completions[2] == ("get_weather_forecast", True)
+
+    @pytest.mark.asyncio
+    async def test_static_fallback_without_model_manager(self, tmp_path):
+        """Without model_manager, static args are used (no agentic loop)."""
+        tm = SimulatedToolManager()
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        runner = PlanRunner(context, enable_guards=False)  # No model_manager
+
+        plan = {
+            "id": "static-test",
+            "title": "Static Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Geocode",
+                    "tool": "geocode_location",
+                    "args": {"name": "Leavenheath"},
+                    "result_variable": "geo",
+                },
+            ],
+        }
+
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        # Should succeed (Leavenheath without comma works)
+        assert result.success
+        assert result.variables["geo"]["results"][0]["latitude"] == 52.0
+
+        # Only one tool call (no retry without LLM)
+        assert len(tm.calls) == 1
diff --git a/tests/planning/test_backends.py b/tests/planning/test_backends.py
new file mode 100644
index 00000000..de1c0a94
--- /dev/null
+++ b/tests/planning/test_backends.py
@@ -0,0 +1,727 @@
+# tests/planning/test_backends.py
+"""Tests for McpToolBackend — the bridge from planner to ToolManager with guards."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from chuk_ai_planner.execution.models import ToolExecutionRequest
+from mcp_cli.planning.backends import (
+    McpToolBackend,
+    _extract_result,
+    _extract_content_blocks,
+    _try_parse_json,
+    _is_error_result,
+    _extract_error_message,
+    _check_guards,
+    _record_result,
+)
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+
+@dataclass
+class FakeToolCallResult:
+    """Mimics mcp_cli.tools.models.ToolCallResult."""
+
+    tool_name: str
+    success: bool
+    result: Any = None
+    error: str | None = None
+
+
+class FakeToolManager:
+    """Minimal ToolManager stub for testing McpToolBackend."""
+
+    def __init__(
+        self,
+        *,
+        result: Any = "ok",
+        success: bool = True,
+        error: str | None = None,
+        raise_exc: Exception | None = None,
+    ):
+        self._result = result
+        self._success = success
+        self._error = error
+        self._raise_exc = raise_exc
+        self.calls: list[tuple[str, dict, str | None]] = []
+
+    async def execute_tool(
+        self,
+        tool_name: str,
+        arguments: dict[str, Any],
+        namespace: str | None = None,
+        timeout: float | None = None,
+    ) -> FakeToolCallResult:
+        self.calls.append((tool_name, arguments, namespace))
+        if self._raise_exc:
+            raise self._raise_exc
+        return FakeToolCallResult(
+            tool_name=tool_name,
+            success=self._success,
+            result=self._result,
+            error=self._error,
+        )
+
+
+# ── Tests: Basic Execution ───────────────────────────────────────────────────
+
+
+class TestMcpToolBackendSuccess:
+    """Test successful tool execution through the backend."""
+
+    @pytest.mark.asyncio
+    async def test_basic_execution(self):
+        """Backend calls ToolManager and returns success."""
+        tm = FakeToolManager(result="hello world")
+        backend = McpToolBackend(tm, enable_guards=False)
+
+        request = ToolExecutionRequest(
+            tool_name="read_file",
+            args={"path": "/tmp/test.txt"},
+            step_id="step-1",
+        )
+        result = await backend.execute_tool(request)
+
+        assert result.success
+        assert result.result == "hello world"
+        assert result.error is None
+        assert result.tool_name == "read_file"
+        assert result.duration > 0
+        assert tm.calls == [("read_file", {"path": "/tmp/test.txt"}, None)]
+
+    @pytest.mark.asyncio
+    async def test_with_namespace(self):
+        """Backend applies namespace prefix to tool name."""
+        tm = FakeToolManager(result="done")
+        backend = McpToolBackend(tm, namespace="filesystem", enable_guards=False)
+
+        request = ToolExecutionRequest(
+            tool_name="read_file",
+            args={"path": "/tmp/x"},
+            step_id="step-2",
+        )
+        result = await backend.execute_tool(request)
+
+        assert result.success
+        assert result.tool_name == "read_file"
+        assert tm.calls[0][0] == "filesystem__read_file"
+        assert tm.calls[0][2] == "filesystem"
+
+    @pytest.mark.asyncio
+    async def test_empty_args(self):
+        """Backend handles empty arguments."""
+        tm = FakeToolManager(result={"count": 5})
+        backend = McpToolBackend(tm, enable_guards=False)
+
+        request = ToolExecutionRequest(
+            tool_name="list_tools",
+            args={},
+            step_id="step-3",
+        )
+        result = await backend.execute_tool(request)
+
+        assert result.success
+        assert result.result == {"count": 5}
+
+
+class TestMcpToolBackendFailure:
+    """Test error handling in the backend."""
+
+    @pytest.mark.asyncio
+    async def test_tool_returns_error(self):
+        """Backend wraps ToolCallResult errors."""
+        tm = FakeToolManager(success=False, error="File not found")
+        backend = McpToolBackend(tm, enable_guards=False)
+
+        request = ToolExecutionRequest(
+            tool_name="read_file",
+            args={"path": "/nonexistent"},
+            step_id="step-4",
+        )
+        result = await backend.execute_tool(request)
+
+        assert not result.success
+        assert result.error == "File not found"
+        assert result.result is None
+
+    @pytest.mark.asyncio
+    async def test_tool_raises_exception(self):
+        """Backend catches exceptions from ToolManager."""
+        tm = FakeToolManager(raise_exc=ConnectionError("server down"))
+        backend = McpToolBackend(tm, enable_guards=False)
+
+        request = ToolExecutionRequest(
+            tool_name="ping",
+            args={},
+            step_id="step-5",
+        )
+        result = await backend.execute_tool(request)
+
+        assert not result.success
+        assert "server down" in result.error
+        assert result.result is None
+        assert result.duration > 0
+
+    @pytest.mark.asyncio
+    async def test_ctp_middleware_error_detected(self):
+        """When ToolManager wraps a CTP ToolExecutionResult error as success=True,
+        the backend should detect and report the error correctly.
+
+        This reproduces the bug where CTP middleware returns
+        ToolExecutionResult(success=False, error="...") but ToolManager wraps it
+        as ToolCallResult(success=True, result=<CTP ToolExecutionResult>).
+        """
+
+        @dataclass
+        class CTPToolExecResult:
+            success: bool
+            result: Any
+            error: str | None
+            tool_name: str = ""
+            duration_ms: float = 0.0
+
+        # Simulate ToolManager wrapping CTP error as success=True
+        ctp_error = CTPToolExecResult(
+            success=False,
+            result=None,
+            error="JSON-RPC Error: ParameterValidationError: Invalid parameter 'name'",
+            tool_name="geocode_location",
+            duration_ms=50,
+        )
+        tm = FakeToolManager(success=True, result=ctp_error)
+        backend = McpToolBackend(tm, enable_guards=False)
+
+        request = ToolExecutionRequest(
+            tool_name="geocode_location",
+            args={"query": "London"},
+            step_id="step-1",
+        )
+        result = await backend.execute_tool(request)
+
+        assert not result.success
+        assert "ParameterValidationError" in result.error
+        assert result.result is None
+
+    @pytest.mark.asyncio
+    async def test_ctp_middleware_success_unwrapped(self):
+        """When ToolManager wraps a successful CTP ToolExecutionResult,
+        the backend should unwrap and return the inner result."""
+
+        @dataclass
+        class CTPToolExecResult:
+            success: bool
+            result: Any
+            error: str | None
+            tool_name: str = ""
+            duration_ms: float = 0.0
+
+        ctp_ok = CTPToolExecResult(
+            success=True,
+            result={"lat": 51.95, "lon": 0.85},
+            error=None,
+            tool_name="geocode_location",
+            duration_ms=120,
+        )
+        tm = FakeToolManager(success=True, result=ctp_ok)
+        backend = McpToolBackend(tm, enable_guards=False)
+
+        request = ToolExecutionRequest(
+            tool_name="geocode_location",
+            args={"query": "Leavenheath"},
+            step_id="step-1",
+        )
+        result = await backend.execute_tool(request)
+
+        assert result.success
+        assert result.result == {"lat": 51.95, "lon": 0.85}
+        assert result.error is None
+
+
+# ── Tests: Guard Integration ─────────────────────────────────────────────────
+
+
+class TestGuardIntegration:
+    """Test guard check/record integration in the backend."""
+
+    @pytest.mark.asyncio
+    async def test_guard_blocks_tool(self):
+        """When guards block, the tool is not executed."""
+        tm = FakeToolManager(result="should not see this")
+        backend = McpToolBackend(tm, enable_guards=True)
+
+        # Mock _check_guards to return a block
+        with patch(
+            "mcp_cli.planning.backends._check_guards",
+            return_value="Budget exhausted",
+        ):
+            request = ToolExecutionRequest(
+                tool_name="write_file",
+                args={"path": "/tmp/x", "content": "data"},
+                step_id="step-6",
+            )
+            result = await backend.execute_tool(request)
+
+        assert not result.success
+        assert "Guard blocked" in result.error
+        assert "Budget exhausted" in result.error
+        # Tool was never called
+        assert len(tm.calls) == 0
+
+    @pytest.mark.asyncio
+    async def test_guard_allows_tool(self):
+        """When guards allow, the tool executes normally."""
+        tm = FakeToolManager(result="success")
+        backend = McpToolBackend(tm, enable_guards=True)
+
+        with (
+            patch(
+                "mcp_cli.planning.backends._check_guards",
+                return_value=None,
+            ),
+            patch(
+                "mcp_cli.planning.backends._record_result",
+            ) as mock_record,
+        ):
+            request = ToolExecutionRequest(
+                tool_name="read_file",
+                args={"path": "/tmp/x"},
+                step_id="step-7",
+            )
+            result = await backend.execute_tool(request)
+
+        assert result.success
+        assert result.result == "success"
+        assert len(tm.calls) == 1
+        # Result was recorded
+        mock_record.assert_called_once_with("read_file", {"path": "/tmp/x"}, "success")
+
+    @pytest.mark.asyncio
+    async def test_guards_disabled(self):
+        """When enable_guards=False, no guard checks are performed."""
+        tm = FakeToolManager(result="ok")
+        backend = McpToolBackend(tm, enable_guards=False)
+
+        with patch(
+            "mcp_cli.planning.backends._check_guards",
+        ) as mock_check:
+            request = ToolExecutionRequest(
+                tool_name="read_file",
+                args={},
+                step_id="step-8",
+            )
+            result = await backend.execute_tool(request)
+
+        assert result.success
+        mock_check.assert_not_called()
+
+
+class TestCheckGuards:
+    """Test _check_guards helper."""
+
+    def test_no_session_manager(self):
+        """Returns None when chuk_ai_session_manager is not available."""
+        with patch.dict(
+            "sys.modules",
+            {"chuk_ai_session_manager": None, "chuk_ai_session_manager.guards": None},
+        ):
+            result = _check_guards("tool", {})
+        assert result is None
+
+    def test_guard_allows(self):
+        """Returns None when guards allow the tool."""
+        mock_state = MagicMock()
+        mock_state.check_per_tool_limit.return_value = MagicMock(blocked=False)
+        mock_state.check_all_guards.return_value = MagicMock(blocked=False)
+        mock_state.limits.per_tool_cap = 10
+
+        with patch(
+            "chuk_ai_session_manager.guards.get_tool_state",
+            return_value=mock_state,
+        ):
+            result = _check_guards("read_file", {"path": "/tmp/x"})
+        assert result is None
+
+    def test_guard_blocks(self):
+        """Returns error string when guards block the tool."""
+        mock_state = MagicMock()
+        mock_state.check_per_tool_limit.return_value = MagicMock(blocked=False)
+        mock_state.check_all_guards.return_value = MagicMock(
+            blocked=True, reason="Budget exhausted"
+        )
+        mock_state.limits.per_tool_cap = 10
+
+        with patch(
+            "chuk_ai_session_manager.guards.get_tool_state",
+            return_value=mock_state,
+        ):
+            result = _check_guards("write_file", {"path": "/tmp/x"})
+        assert result == "Budget exhausted"
+
+    def test_tool_state_none(self):
+        """Returns None when get_tool_state() returns None."""
+        with patch(
+            "chuk_ai_session_manager.guards.get_tool_state",
+            return_value=None,
+        ):
+            result = _check_guards("tool", {})
+        assert result is None
+
+
+class TestRecordResult:
+    """Test _record_result helper."""
+
+    def test_record_does_not_raise(self):
+        """Recording should never raise even if guards aren't available."""
+        # Should silently handle any error
+        _record_result("tool", {"arg": "val"}, "result")
+
+
+# ── Tests: Extract Result ────────────────────────────────────────────────────
+
+
+class TestExtractResult:
+    """Test _extract_result normalization."""
+
+    def test_none(self):
+        assert _extract_result(None) is None
+
+    def test_string(self):
+        assert _extract_result("hello") == "hello"
+
+    def test_dict(self):
+        assert _extract_result({"key": "val"}) == {"key": "val"}
+
+    def test_content_blocks_single(self):
+        blocks = [{"type": "text", "text": "result data"}]
+        assert _extract_result(blocks) == "result data"
+
+    def test_content_blocks_multiple(self):
+        blocks = [
+            {"type": "text", "text": "line 1"},
+            {"type": "text", "text": "line 2"},
+        ]
+        assert _extract_result(blocks) == "line 1\nline 2"
+
+    def test_content_blocks_mixed(self):
+        blocks = [
+            {"type": "image", "url": "http://example.com/img.png"},
+            {"type": "text", "text": "caption"},
+        ]
+        assert _extract_result(blocks) == "caption"
+
+    def test_list_of_strings(self):
+        assert _extract_result(["a", "b", "c"]) == "a\nb\nc"
+
+    def test_list_no_text(self):
+        blocks = [{"type": "image", "url": "http://example.com"}]
+        assert _extract_result(blocks) == blocks
+
+    def test_ctp_tool_execution_result_success(self):
+        """Unwrap CTP ToolExecutionResult with success=True."""
+
+        @dataclass
+        class CTPResult:
+            success: bool
+            result: Any
+            error: str | None
+
+        wrapper = CTPResult(success=True, result="actual data", error=None)
+        assert _extract_result(wrapper) == "actual data"
+
+    def test_ctp_tool_execution_result_failure(self):
+        """CTP ToolExecutionResult with success=False returns None."""
+
+        @dataclass
+        class CTPResult:
+            success: bool
+            result: Any
+            error: str | None
+
+        wrapper = CTPResult(success=False, result=None, error="bad args")
+        assert _extract_result(wrapper) is None
+
+    def test_ctp_tool_execution_result_nested(self):
+        """Unwrap nested CTP result with content blocks inside."""
+
+        @dataclass
+        class CTPResult:
+            success: bool
+            result: Any
+            error: str | None
+
+        inner = [{"type": "text", "text": "geocoded coords"}]
+        wrapper = CTPResult(success=True, result=inner, error=None)
+        assert _extract_result(wrapper) == "geocoded coords"
+
+    def test_mcp_dict_wrapper_with_tool_result(self):
+        """Extract text from MCP dict wrapper with ToolResult object.
+
+        This is the most common format from stream_manager.call_tool():
+        {"isError": False, "content": ToolResult(content=[{type, text}])}
+        """
+
+        class FakeToolResult:
+            def __init__(self, content):
+                self.content = content
+
+        tool_result = FakeToolResult(
+            content=[{"type": "text", "text": '{"lat": 51.95, "lon": 0.85}'}]
+        )
+        raw = {"isError": False, "content": tool_result}
+        result = _extract_result(raw)
+        # JSON string should be parsed into a dict
+        assert result == {"lat": 51.95, "lon": 0.85}
+
+    def test_mcp_dict_wrapper_with_list_content(self):
+        """Extract text from MCP dict wrapper with content as a list."""
+        raw = {
+            "isError": False,
+            "content": [{"type": "text", "text": "hello world"}],
+        }
+        assert _extract_result(raw) == "hello world"
+
+    def test_tool_result_object_directly(self):
+        """Extract text from a ToolResult object without dict wrapper."""
+
+        class FakeToolResult:
+            def __init__(self, content):
+                self.content = content
+
+        tool_result = FakeToolResult(content=[{"type": "text", "text": "data"}])
+        assert _extract_result(tool_result) == "data"
+
+    def test_tool_result_with_object_content_blocks(self):
+        """Extract text from ToolResult with object-type content blocks."""
+
+        class ContentBlock:
+            def __init__(self, type_, text):
+                self.type = type_
+                self.text = text
+
+        class FakeToolResult:
+            def __init__(self, content):
+                self.content = content
+
+        tool_result = FakeToolResult(
+            content=[ContentBlock("text", '{"results": [1, 2, 3]}')]
+        )
+        result = _extract_result(tool_result)
+        assert result == {"results": [1, 2, 3]}
+
+    def test_json_string_parsed_to_dict(self):
+        """JSON strings are parsed into dicts."""
+        raw = '{"temperature": 15.5, "unit": "celsius"}'
+        result = _extract_result(raw)
+        assert result == {"temperature": 15.5, "unit": "celsius"}
+
+    def test_json_string_parsed_to_list(self):
+        """JSON strings are parsed into lists."""
+        raw = "[1, 2, 3]"
+        result = _extract_result(raw)
+        assert result == [1, 2, 3]
+
+    def test_non_json_string_preserved(self):
+        """Non-JSON strings are returned as-is."""
+        assert _extract_result("plain text") == "plain text"
+
+    def test_integer_preserved(self):
+        """Integer values are returned as-is."""
+        assert _extract_result(42) == 42
+
+    def test_nested_mcp_ctp_wrapper(self):
+        """Handle CTP wrapping an MCP dict: CTP(result={isError, content: TR})."""
+
+        class FakeToolResult:
+            def __init__(self, content):
+                self.content = content
+
+        @dataclass
+        class CTPResult:
+            success: bool
+            result: Any
+            error: str | None
+
+        tool_result = FakeToolResult(
+            content=[{"type": "text", "text": '{"coords": [51.95, 0.85]}'}]
+        )
+        ctp = CTPResult(
+            success=True,
+            result={"isError": False, "content": tool_result},
+            error=None,
+        )
+        result = _extract_result(ctp)
+        assert result == {"coords": [51.95, 0.85]}
+
+
+# ── Tests: Content Block Helpers ──────────────────────────────────────────
+
+
+class TestExtractContentBlocks:
+    """Test _extract_content_blocks helper."""
+
+    def test_dict_blocks(self):
+        blocks = [{"type": "text", "text": "hello"}]
+        assert _extract_content_blocks(blocks) == "hello"
+
+    def test_object_blocks(self):
+        class Block:
+            def __init__(self, type_, text):
+                self.type = type_
+                self.text = text
+
+        blocks = [Block("text", "world")]
+        assert _extract_content_blocks(blocks) == "world"
+
+    def test_mixed_blocks(self):
+        blocks = [
+            {"type": "image", "url": "x"},
+            {"type": "text", "text": "caption"},
+        ]
+        assert _extract_content_blocks(blocks) == "caption"
+
+    def test_no_text_blocks(self):
+        blocks = [{"type": "image", "url": "x"}]
+        assert _extract_content_blocks(blocks) == blocks
+
+    def test_json_text_parsed(self):
+        blocks = [{"type": "text", "text": '{"key": "value"}'}]
+        assert _extract_content_blocks(blocks) == {"key": "value"}
+
+
+class TestTryParseJson:
+    """Test _try_parse_json helper."""
+
+    def test_valid_dict(self):
+        assert _try_parse_json('{"a": 1}') == {"a": 1}
+
+    def test_valid_list(self):
+        assert _try_parse_json("[1, 2]") == [1, 2]
+
+    def test_plain_string(self):
+        assert _try_parse_json("hello") == "hello"
+
+    def test_empty_string(self):
+        assert _try_parse_json("") == ""
+
+    def test_numeric_string(self):
+        assert _try_parse_json("42") == 42
+
+    def test_invalid_json(self):
+        assert _try_parse_json("{bad json") == "{bad json"
+
+
+# ── Tests: Is Error Result ────────────────────────────────────────────────
+
+
+class TestIsErrorResult:
+    """Test _is_error_result detection of various error formats."""
+
+    def test_none(self):
+        assert not _is_error_result(None)
+
+    def test_string(self):
+        assert not _is_error_result("hello")
+
+    def test_dict_with_is_error(self):
+        assert _is_error_result({"isError": True, "error": "bad"})
+
+    def test_dict_without_is_error(self):
+        assert not _is_error_result({"result": "ok"})
+
+    def test_object_with_is_error(self):
+
+        class Obj:
+            isError = True
+
+        assert _is_error_result(Obj())
+
+    def test_list_with_error_block(self):
+        blocks = [{"isError": True, "text": "error"}]
+        assert _is_error_result(blocks)
+
+    def test_list_without_error_block(self):
+        blocks = [{"type": "text", "text": "ok"}]
+        assert not _is_error_result(blocks)
+
+    def test_ctp_tool_execution_result_failure(self):
+        """Detect CTP ToolExecutionResult with success=False."""
+
+        @dataclass
+        class CTPResult:
+            success: bool
+            result: Any
+            error: str | None
+
+        wrapper = CTPResult(success=False, result=None, error="JSON-RPC Error")
+        assert _is_error_result(wrapper)
+
+    def test_ctp_tool_execution_result_success(self):
+        """CTP ToolExecutionResult with success=True is not an error."""
+
+        @dataclass
+        class CTPResult:
+            success: bool
+            result: Any
+            error: str | None
+
+        wrapper = CTPResult(success=True, result="data", error=None)
+        assert not _is_error_result(wrapper)
+
+    def test_dict_with_nested_is_error(self):
+        """Detect error in nested content ToolResult with isError=True."""
+
+        class FakeToolResult:
+            isError = True
+
+        raw = {"content": FakeToolResult()}
+        assert _is_error_result(raw)
+
+    def test_dict_with_nested_no_error(self):
+        """Dict with content that has isError=False is not an error."""
+
+        class FakeToolResult:
+            isError = False
+
+        raw = {"content": FakeToolResult()}
+        assert not _is_error_result(raw)
+
+
+# ── Tests: Extract Error Message ──────────────────────────────────────────
+
+
+class TestExtractErrorMessage:
+    """Test _extract_error_message helper."""
+
+    def test_none(self):
+        assert _extract_error_message(None) is None
+
+    def test_content_blocks(self):
+        blocks = [{"type": "text", "text": "error details"}]
+        assert _extract_error_message(blocks) == "error details"
+
+    def test_ctp_result_with_error(self):
+        """Extract error from CTP ToolExecutionResult."""
+
+        @dataclass
+        class CTPResult:
+            success: bool
+            result: Any
+            error: str | None
+
+        wrapper = CTPResult(
+            success=False, result=None, error="ParameterValidationError: bad args"
+        )
+        assert _extract_error_message(wrapper) == "ParameterValidationError: bad args"
+
+    def test_long_string_truncated(self):
+        long_text = "x" * 300
+        result = _extract_error_message(long_text)
+        assert len(result) < 300
+        assert result.endswith("...")
diff --git a/tests/planning/test_context.py b/tests/planning/test_context.py
new file mode 100644
index 00000000..c3462755
--- /dev/null
+++ b/tests/planning/test_context.py
@@ -0,0 +1,524 @@
+# tests/planning/test_context.py
+"""Tests for PlanningContext — state container with PlanRegistry round-trips."""
+
+from __future__ import annotations
+
+from typing import Any
+from dataclasses import dataclass
+
+import pytest
+
+from mcp_cli.planning.context import PlanningContext
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+
+@dataclass
+class FakeToolInfo:
+    """Minimal ToolInfo stub."""
+
+    name: str
+
+
+class FakeToolManager:
+    """Minimal ToolManager stub for PlanningContext tests."""
+
+    def __init__(self, tool_names: list[str] | None = None):
+        self._tool_names = tool_names or ["read_file", "write_file", "search_code"]
+
+    async def get_all_tools(self) -> list[FakeToolInfo]:
+        return [FakeToolInfo(name=n) for n in self._tool_names]
+
+    async def get_adapted_tools_for_llm(self, provider: str) -> list[dict[str, Any]]:
+        return [
+            {"type": "function", "function": {"name": n, "description": f"Tool: {n}"}}
+            for n in self._tool_names
+        ]
+
+
+SAMPLE_PLAN_DICT = {
+    "title": "Test Plan",
+    "description": "A test plan for round-trip verification",
+    "tags": ["test"],
+    "variables": {"base_path": "/tmp"},
+    "steps": [
+        {
+            "title": "Read file",
+            "tool": "read_file",
+            "args": {"path": "/tmp/test.py"},
+            "depends_on": [],
+            "result_variable": "file_content",
+        },
+        {
+            "title": "Search code",
+            "tool": "search_code",
+            "args": {"query": "def main"},
+            "depends_on": [0],
+            "result_variable": "search_results",
+        },
+    ],
+}
+
+
+# ── Tests: Initialization ───────────────────────────────────────────────────
+
+
+class TestPlanningContextInit:
+    """Test PlanningContext initialization."""
+
+    def test_default_plans_dir(self, tmp_path):
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        assert ctx.plans_dir.exists()
+
+    def test_graph_store_created(self, tmp_path):
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        assert ctx.graph_store is not None
+
+    def test_plan_registry_created(self, tmp_path):
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        assert ctx.plan_registry is not None
+
+
+# ── Tests: Tool Catalog ─────────────────────────────────────────────────────
+
+
+class TestPlanningContextToolCatalog:
+    """Test tool catalog methods."""
+
+    @pytest.mark.asyncio
+    async def test_get_tool_names(self, tmp_path):
+        tm = FakeToolManager(["alpha", "beta", "gamma"])
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        names = await ctx.get_tool_names()
+        assert names == ["alpha", "beta", "gamma"]
+
+    @pytest.mark.asyncio
+    async def test_get_tool_catalog(self, tmp_path):
+        tm = FakeToolManager(["read_file"])
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        catalog = await ctx.get_tool_catalog()
+        assert len(catalog) == 1
+        assert catalog[0]["function"]["name"] == "read_file"
+
+    @pytest.mark.asyncio
+    async def test_get_tool_names_handles_error(self, tmp_path):
+        class BrokenToolManager:
+            async def get_all_tools(self):
+                raise RuntimeError("boom")
+
+        ctx = PlanningContext(BrokenToolManager(), plans_dir=tmp_path / "plans")
+        assert await ctx.get_tool_names() == []
+
+
+# ── Tests: Plan CRUD ────────────────────────────────────────────────────────
+
+
+class TestPlanningContextPlanCrud:
+    """Test plan CRUD with real PlanRegistry round-trips."""
+
+    @pytest.mark.asyncio
+    async def test_list_plans_empty(self, tmp_path):
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        plans = await ctx.list_plans()
+        assert plans == []
+
+    @pytest.mark.asyncio
+    async def test_get_plan_not_found(self, tmp_path):
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        plan = await ctx.get_plan("nonexistent-id")
+        assert plan is None
+
+    @pytest.mark.asyncio
+    async def test_delete_plan_not_found(self, tmp_path):
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        assert await ctx.delete_plan("nonexistent-id") is False
+
+
+# ── Tests: PlanRegistry Round-Trip ───────────────────────────────────────────
+
+
+class TestPlanRegistryRoundTrip:
+    """Verify that plans survive save → disk → load cycle."""
+
+    @pytest.mark.asyncio
+    async def test_save_and_load_plan(self, tmp_path):
+        """Save a plan from dict, then load it back and verify contents."""
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        # Save
+        plan_id = await ctx.save_plan_from_dict(SAMPLE_PLAN_DICT)
+        assert plan_id is not None
+        assert len(plan_id) > 0
+
+        # Load back as dict
+        loaded = await ctx.get_plan(plan_id)
+        assert loaded is not None
+        assert loaded["title"] == "Test Plan"
+        assert len(loaded["steps"]) == 2
+
+    @pytest.mark.asyncio
+    async def test_save_and_list_plans(self, tmp_path):
+        """Save a plan and verify it appears in the list."""
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        plan_id = await ctx.save_plan_from_dict(SAMPLE_PLAN_DICT)
+
+        plans = await ctx.list_plans()
+        assert len(plans) >= 1
+
+        plan_ids = [p.get("id") for p in plans]
+        assert plan_id in plan_ids
+
+    @pytest.mark.asyncio
+    async def test_save_and_delete_plan(self, tmp_path):
+        """Save a plan, then delete it, verify it's gone."""
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        plan_id = await ctx.save_plan_from_dict(SAMPLE_PLAN_DICT)
+
+        # Delete
+        assert await ctx.delete_plan(plan_id) is True
+
+        # Verify gone
+        loaded = await ctx.get_plan(plan_id)
+        assert loaded is None
+
+    @pytest.mark.asyncio
+    async def test_plan_persists_to_disk(self, tmp_path):
+        """Plan JSON file should exist on disk after save."""
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        plan_id = await ctx.save_plan_from_dict(SAMPLE_PLAN_DICT)
+
+        json_path = tmp_path / "plans" / f"{plan_id}.json"
+        assert json_path.exists()
+
+        # Verify it's valid JSON
+        import json
+
+        data = json.loads(json_path.read_text())
+        assert data["title"] == "Test Plan"
+
+    @pytest.mark.asyncio
+    async def test_fresh_context_loads_from_disk(self, tmp_path):
+        """A new PlanningContext should discover plans saved by a previous one."""
+        tm = FakeToolManager()
+
+        # First context: save a plan
+        ctx1 = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        plan_id = await ctx1.save_plan_from_dict(SAMPLE_PLAN_DICT)
+
+        # Second context: should find the plan on disk
+        ctx2 = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        loaded = await ctx2.get_plan(plan_id)
+        assert loaded is not None
+        assert loaded["title"] == "Test Plan"
+
+    @pytest.mark.asyncio
+    async def test_get_plan_object(self, tmp_path):
+        """get_plan_object returns a UniversalPlan, not a dict."""
+        from chuk_ai_planner.core.planner.universal_plan import UniversalPlan
+
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        plan_id = await ctx.save_plan_from_dict(SAMPLE_PLAN_DICT)
+
+        plan_obj = await ctx.get_plan_object(plan_id)
+        assert plan_obj is not None
+        assert isinstance(plan_obj, UniversalPlan)
+        assert plan_obj.title == "Test Plan"
+
+
+# ── Tests: Prefix ID Resolution ────────────────────────────────────────────
+
+
+class TestPlanPrefixResolution:
+    """Test that get_plan, get_plan_object, and delete_plan support prefix IDs."""
+
+    @pytest.mark.asyncio
+    async def test_get_plan_by_prefix(self, tmp_path):
+        """get_plan should resolve a unique prefix to the full plan ID."""
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        plan_id = await ctx.save_plan_from_dict(SAMPLE_PLAN_DICT)
+
+        # Use first 8 chars as prefix (like the UI shows)
+        prefix = plan_id[:8]
+        loaded = await ctx.get_plan(prefix)
+        assert loaded is not None
+        assert loaded["title"] == "Test Plan"
+
+    @pytest.mark.asyncio
+    async def test_get_plan_object_by_prefix(self, tmp_path):
+        """get_plan_object should resolve a unique prefix."""
+        from chuk_ai_planner.core.planner.universal_plan import UniversalPlan
+
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        plan_id = await ctx.save_plan_from_dict(SAMPLE_PLAN_DICT)
+        prefix = plan_id[:8]
+
+        plan_obj = await ctx.get_plan_object(prefix)
+        assert plan_obj is not None
+        assert isinstance(plan_obj, UniversalPlan)
+
+    @pytest.mark.asyncio
+    async def test_delete_plan_by_prefix(self, tmp_path):
+        """delete_plan should resolve a unique prefix."""
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        plan_id = await ctx.save_plan_from_dict(SAMPLE_PLAN_DICT)
+        prefix = plan_id[:8]
+
+        assert await ctx.delete_plan(prefix) is True
+        assert await ctx.get_plan(plan_id) is None
+
+    @pytest.mark.asyncio
+    async def test_full_id_still_works(self, tmp_path):
+        """Full plan IDs should still work as before."""
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        plan_id = await ctx.save_plan_from_dict(SAMPLE_PLAN_DICT)
+        loaded = await ctx.get_plan(plan_id)
+        assert loaded is not None
+
+    @pytest.mark.asyncio
+    async def test_nonexistent_prefix_returns_none(self, tmp_path):
+        """A prefix that matches nothing should return None."""
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        loaded = await ctx.get_plan("zzzzzzz")
+        assert loaded is None
+
+
+# ── Tests: Build Plan from Dict ─────────────────────────────────────────────
+
+
+class TestBuildPlanFromDict:
+    """Test _build_plan_from_dict with different input formats."""
+
+    @pytest.mark.asyncio
+    async def test_build_with_tool_field(self, tmp_path):
+        """Plans with 'tool' field (from PlanAgent) should build correctly."""
+        from chuk_ai_planner.core.store.memory import InMemoryGraphStore
+
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        plan_dict = {
+            "title": "Simple Plan",
+            "steps": [
+                {"title": "Step 1", "tool": "read_file", "args": {"path": "/tmp/x"}},
+            ],
+        }
+
+        graph = InMemoryGraphStore()
+        plan = await ctx._build_plan_from_dict(plan_dict, graph)
+        assert plan.title == "Simple Plan"
+
+    @pytest.mark.asyncio
+    async def test_build_with_tool_calls_field(self, tmp_path):
+        """Plans with 'tool_calls' field (from registry) should build correctly."""
+        from chuk_ai_planner.core.store.memory import InMemoryGraphStore
+
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        plan_dict = {
+            "title": "TC Plan",
+            "steps": [
+                {
+                    "title": "Step 1",
+                    "tool_calls": [
+                        {"id": "tc-1", "name": "read_file", "args": {"path": "/tmp/x"}}
+                    ],
+                },
+            ],
+        }
+
+        graph = InMemoryGraphStore()
+        plan = await ctx._build_plan_from_dict(plan_dict, graph)
+        assert plan.title == "TC Plan"
+
+    @pytest.mark.asyncio
+    async def test_build_with_variables(self, tmp_path):
+        """Plan variables should be set on the UniversalPlan."""
+        from chuk_ai_planner.core.store.memory import InMemoryGraphStore
+
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        plan_dict = {
+            "title": "Var Plan",
+            "variables": {"base_url": "http://localhost"},
+            "steps": [
+                {"title": "Fetch", "tool": "fetch", "args": {"url": "${base_url}"}},
+            ],
+        }
+
+        graph = InMemoryGraphStore()
+        plan = await ctx._build_plan_from_dict(plan_dict, graph)
+        assert plan.variables.get("base_url") == "http://localhost"
+
+
+# ── Tests: Dependency Preservation ────────────────────────────────────────
+
+
+class TestDependencyPreservation:
+    """Test that depends_on survives the save → disk → load cycle.
+
+    UniversalPlan.to_dict() drops depends_on fields. PlanningContext
+    patches the saved JSON to preserve them, and get_plan reads from
+    disk directly to avoid the lossy to_dict() path.
+
+    The LLM generates 0-based depends_on but PlanRegistry assigns
+    1-based string indices. _patch_saved_plan converts 0→"1", 1→"2", etc.
+    """
+
+    @pytest.mark.asyncio
+    async def test_depends_on_preserved_on_disk(self, tmp_path):
+        """After save_plan_from_dict, the JSON file should contain depends_on
+        with 0-based refs converted to match saved step indices."""
+        import json
+
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        plan_dict = {
+            "title": "Dependency Plan",
+            "steps": [
+                {
+                    "title": "Step A",
+                    "tool": "read_file",
+                    "args": {"path": "/a"},
+                    "depends_on": [],
+                    "result_variable": "a_result",
+                },
+                {
+                    "title": "Step B (depends on A)",
+                    "tool": "search_code",
+                    "args": {"query": "test"},
+                    "depends_on": [0],
+                    "result_variable": "b_result",
+                },
+            ],
+        }
+
+        plan_id = await ctx.save_plan_from_dict(plan_dict)
+
+        # Read the JSON file directly
+        json_path = tmp_path / "plans" / f"{plan_id}.json"
+        data = json.loads(json_path.read_text())
+
+        # Step 0 should have empty depends_on
+        assert data["steps"][0].get("depends_on") == []
+        # Step 1 should depend on step 1 (0-based 0 → 1-based "1")
+        step1_deps = data["steps"][1].get("depends_on")
+        assert step1_deps == ["1"]
+
+    @pytest.mark.asyncio
+    async def test_depends_on_survives_load(self, tmp_path):
+        """get_plan should return plan dicts with converted depends_on intact."""
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        plan_dict = {
+            "title": "Chain Plan",
+            "steps": [
+                {
+                    "title": "First",
+                    "tool": "read_file",
+                    "args": {},
+                    "depends_on": [],
+                },
+                {
+                    "title": "Second",
+                    "tool": "write_file",
+                    "args": {},
+                    "depends_on": [0],
+                },
+                {
+                    "title": "Third",
+                    "tool": "search_code",
+                    "args": {},
+                    "depends_on": [0, 1],
+                },
+            ],
+        }
+
+        plan_id = await ctx.save_plan_from_dict(plan_dict)
+
+        # Load through get_plan (should read from disk, preserving depends_on)
+        loaded = await ctx.get_plan(plan_id)
+        assert loaded is not None
+        assert loaded["steps"][0].get("depends_on") == []
+        # 0-based [0] → 1-based ["1"]
+        assert loaded["steps"][1].get("depends_on") == ["1"]
+        # 0-based [0, 1] → 1-based ["1", "2"]
+        assert loaded["steps"][2].get("depends_on") == ["1", "2"]
+
+    @pytest.mark.asyncio
+    async def test_depends_on_survives_fresh_context(self, tmp_path):
+        """A new PlanningContext should load depends_on from disk."""
+        tm = FakeToolManager()
+
+        # Save with first context
+        ctx1 = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        plan_dict = {
+            "title": "Cross-session Plan",
+            "steps": [
+                {"title": "A", "tool": "read_file", "args": {}, "depends_on": []},
+                {"title": "B", "tool": "write_file", "args": {}, "depends_on": [0]},
+            ],
+        }
+        plan_id = await ctx1.save_plan_from_dict(plan_dict)
+
+        # Load with fresh context (simulating new session)
+        ctx2 = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        loaded = await ctx2.get_plan(plan_id)
+        assert loaded is not None
+        # 0-based [0] → 1-based ["1"]
+        assert loaded["steps"][1].get("depends_on") == ["1"]
+
+    @pytest.mark.asyncio
+    async def test_state_files_excluded_from_plan_ids(self, tmp_path):
+        """Checkpoint _state.json files should not appear as plans."""
+        import json
+
+        tm = FakeToolManager()
+        ctx = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        # Save a real plan
+        plan_id = await ctx.save_plan_from_dict(SAMPLE_PLAN_DICT)
+
+        # Create a fake state file (like PlanRunner saves)
+        state_path = tmp_path / "plans" / f"{plan_id}_state.json"
+        state_path.write_text(json.dumps({"completed_steps": [0]}))
+
+        # _plan_ids should only return the real plan
+        ids = ctx._plan_ids()
+        assert plan_id in ids
+        assert f"{plan_id}_state" not in ids
+
+        # list_plans should not include the state file
+        plans = await ctx.list_plans()
+        plan_ids = [p.get("id") for p in plans]
+        assert plan_id in plan_ids
+        assert f"{plan_id}_state" not in plan_ids
diff --git a/tests/planning/test_executor.py b/tests/planning/test_executor.py
new file mode 100644
index 00000000..54f61ba2
--- /dev/null
+++ b/tests/planning/test_executor.py
@@ -0,0 +1,1619 @@
+# tests/planning/test_executor.py
+"""Tests for PlanRunner — plan execution with parallel batches, guards, DAG viz,
+checkpoints, variable resolution, and agentic LLM-driven execution."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from typing import Any
+from dataclasses import dataclass
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from mcp_cli.planning.executor import (
+    PlanRunner,
+    PlanExecutionResult,
+    ModelManagerProtocol,
+    render_plan_dag,
+    _maybe_await,
+    _serialize_variables,
+    _summarize_variables,
+    _compute_batches,
+    _resolve_variables,
+    _resolve_value,
+    _extract_tool_call,
+    _parse_tool_call_entry,
+)
+from mcp_cli.planning.context import PlanningContext
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+
+@dataclass
+class FakeToolCallResult:
+    tool_name: str
+    success: bool = True
+    result: Any = "mock result"
+    error: str | None = None
+
+
+class FakeToolInfo:
+    def __init__(self, name):
+        self.name = name
+
+
+class FakeToolManager:
+    """Minimal ToolManager stub."""
+
+    def __init__(self, results: dict[str, Any] | None = None, *, delay: float = 0):
+        self._results = results or {}
+        self._delay = delay
+        self.calls: list[tuple[str, dict]] = []
+
+    def get_all_tools(self):
+        return [FakeToolInfo(n) for n in self._results.keys()] if self._results else []
+
+    async def execute_tool(self, tool_name, arguments, namespace=None, timeout=None):
+        if self._delay > 0:
+            await asyncio.sleep(self._delay)
+        self.calls.append((tool_name, arguments))
+        result = self._results.get(tool_name, "default result")
+        if isinstance(result, Exception):
+            return FakeToolCallResult(
+                tool_name=tool_name, success=False, error=str(result)
+            )
+        return FakeToolCallResult(tool_name=tool_name, result=result)
+
+
+class FailingToolManager(FakeToolManager):
+    """ToolManager that fails on specific tools."""
+
+    def __init__(self, fail_tools: set[str], results: dict[str, Any] | None = None):
+        super().__init__(results or {})
+        self._fail_tools = fail_tools
+
+    async def execute_tool(self, tool_name, arguments, namespace=None, timeout=None):
+        self.calls.append((tool_name, arguments))
+        if tool_name in self._fail_tools:
+            return FakeToolCallResult(
+                tool_name=tool_name, success=False, error=f"{tool_name} failed"
+            )
+        result = self._results.get(tool_name, "default result")
+        return FakeToolCallResult(tool_name=tool_name, result=result)
+
+
+SAMPLE_PLAN = {
+    "id": "test-plan-001",
+    "title": "Test Plan",
+    "steps": [
+        {
+            "index": "1",
+            "title": "Read file",
+            "tool_calls": [
+                {"id": "tc-1", "name": "read_file", "args": {"path": "test.py"}}
+            ],
+            "depends_on": [],
+            "result_variable": "file_content",
+        },
+        {
+            "index": "2",
+            "title": "Search code",
+            "tool_calls": [
+                {"id": "tc-2", "name": "search_code", "args": {"query": "def main"}}
+            ],
+            "depends_on": ["1"],
+            "result_variable": "search_results",
+        },
+    ],
+    "variables": {},
+}
+
+PARALLEL_PLAN = {
+    "id": "test-plan-parallel",
+    "title": "Parallel Plan",
+    "steps": [
+        {
+            "index": "1",
+            "title": "Read file A",
+            "tool_calls": [
+                {"id": "tc-1", "name": "read_file", "args": {"path": "a.py"}}
+            ],
+            "depends_on": [],
+            "result_variable": "file_a",
+        },
+        {
+            "index": "2",
+            "title": "Read file B",
+            "tool_calls": [
+                {"id": "tc-2", "name": "read_file", "args": {"path": "b.py"}}
+            ],
+            "depends_on": [],
+            "result_variable": "file_b",
+        },
+        {
+            "index": "3",
+            "title": "Merge results",
+            "tool_calls": [{"id": "tc-3", "name": "merge", "args": {}}],
+            "depends_on": ["1", "2"],
+            "result_variable": "merged",
+        },
+    ],
+    "variables": {},
+}
+
+DIAMOND_PLAN = {
+    "id": "test-plan-diamond",
+    "title": "Diamond Plan",
+    "steps": [
+        {
+            "index": "1",
+            "title": "Init",
+            "tool_calls": [{"id": "tc-1", "name": "init", "args": {}}],
+            "depends_on": [],
+            "result_variable": "init_result",
+        },
+        {
+            "index": "2",
+            "title": "Branch A",
+            "tool_calls": [{"id": "tc-2", "name": "branch_a", "args": {}}],
+            "depends_on": ["1"],
+            "result_variable": "branch_a_result",
+        },
+        {
+            "index": "3",
+            "title": "Branch B",
+            "tool_calls": [{"id": "tc-3", "name": "branch_b", "args": {}}],
+            "depends_on": ["1"],
+            "result_variable": "branch_b_result",
+        },
+        {
+            "index": "4",
+            "title": "Branch C",
+            "tool_calls": [{"id": "tc-4", "name": "branch_c", "args": {}}],
+            "depends_on": ["1"],
+            "result_variable": "branch_c_result",
+        },
+        {
+            "index": "5",
+            "title": "Join",
+            "tool_calls": [{"id": "tc-5", "name": "join", "args": {}}],
+            "depends_on": ["2", "3", "4"],
+            "result_variable": "join_result",
+        },
+    ],
+    "variables": {},
+}
+
+VARS_PLAN = {
+    "id": "test-plan-vars",
+    "title": "Variable Plan",
+    "variables": {"base_url": "http://localhost:8080"},
+    "steps": [
+        {
+            "index": "1",
+            "title": "Fetch users",
+            "tool_calls": [
+                {"id": "tc-1", "name": "fetch", "args": {"url": "${base_url}/users"}}
+            ],
+            "depends_on": [],
+            "result_variable": "users",
+        },
+        {
+            "index": "2",
+            "title": "Process users",
+            "tool_calls": [
+                {"id": "tc-2", "name": "process", "args": {"data": "${users}"}}
+            ],
+            "depends_on": ["1"],
+            "result_variable": "processed",
+        },
+    ],
+}
+
+
+# ── Tests: Dry Run ───────────────────────────────────────────────────────────
+
+
+class TestPlanRunnerDryRun:
+    """Test dry-run mode — trace without executing."""
+
+    @pytest.mark.asyncio
+    async def test_dry_run_returns_all_steps(self, tmp_path):
+        tm = FakeToolManager()
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        result = await runner.execute_plan(SAMPLE_PLAN, dry_run=True)
+
+        assert result.success
+        assert result.plan_id == "test-plan-001"
+        assert result.plan_title == "Test Plan"
+        assert len(result.steps) == 2
+        assert result.steps[0].step_title == "Read file"
+        assert result.steps[0].tool_name == "read_file"
+        assert result.steps[1].step_title == "Search code"
+
+    @pytest.mark.asyncio
+    async def test_dry_run_marks_not_executed(self, tmp_path):
+        tm = FakeToolManager()
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        result = await runner.execute_plan(SAMPLE_PLAN, dry_run=True)
+
+        for step in result.steps:
+            assert step.result == "[dry-run: not executed]"
+
+    @pytest.mark.asyncio
+    async def test_dry_run_simulates_variables(self, tmp_path):
+        """Dry run should simulate variable binding."""
+        tm = FakeToolManager()
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        result = await runner.execute_plan(SAMPLE_PLAN, dry_run=True)
+
+        assert "file_content" in result.variables
+        assert "search_results" in result.variables
+
+    @pytest.mark.asyncio
+    async def test_dry_run_callbacks(self, tmp_path):
+        tm = FakeToolManager()
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        started = []
+        completed = []
+
+        runner = PlanRunner(
+            context,
+            on_step_start=lambda i, t, tn: started.append((i, t, tn)),
+            on_step_complete=lambda sr: completed.append(sr.step_title),
+            enable_guards=False,
+        )
+
+        await runner.execute_plan(SAMPLE_PLAN, dry_run=True)
+
+        assert len(started) == 2
+        assert started[0] == ("1", "Read file", "read_file")
+        assert len(completed) == 2
+
+
+# ── Tests: Live Execution ──────────────────────────────────────────────────
+
+
+class TestPlanRunnerExecution:
+    """Test live plan execution with the new parallel batch engine."""
+
+    @pytest.mark.asyncio
+    async def test_linear_execution(self, tmp_path):
+        """Sequential plan executes all steps in order."""
+        tm = FakeToolManager({"read_file": "file data", "search_code": "found main"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        result = await runner.execute_plan(SAMPLE_PLAN, checkpoint=False)
+
+        assert result.success
+        assert len(result.steps) == 2
+        assert result.steps[0].success
+        assert result.steps[0].tool_name == "read_file"
+        assert result.steps[1].success
+        assert result.steps[1].tool_name == "search_code"
+        assert result.total_duration > 0
+
+    @pytest.mark.asyncio
+    async def test_variable_binding(self, tmp_path):
+        """Result variables are stored and available to later steps."""
+        tm = FakeToolManager(
+            {"read_file": "file contents", "search_code": "search hits"}
+        )
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        result = await runner.execute_plan(SAMPLE_PLAN, checkpoint=False)
+
+        assert result.variables.get("file_content") == "file contents"
+        assert result.variables.get("search_results") == "search hits"
+
+    @pytest.mark.asyncio
+    async def test_parallel_execution(self, tmp_path):
+        """Independent steps run in the same batch."""
+        tm = FakeToolManager(
+            {"read_file": "data", "merge": "merged"},
+            delay=0.01,  # Small delay to verify concurrency
+        )
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        result = await runner.execute_plan(PARALLEL_PLAN, checkpoint=False)
+
+        assert result.success
+        assert len(result.steps) == 3
+        # Steps 1 and 2 should have been in the same batch
+        assert result.steps[0].success
+        assert result.steps[1].success
+        assert result.steps[2].success
+
+    @pytest.mark.asyncio
+    async def test_diamond_execution(self, tmp_path):
+        """Diamond DAG (1 → 2,3,4 → 5) executes correctly."""
+        tm = FakeToolManager(
+            {
+                "init": "initialized",
+                "branch_a": "A",
+                "branch_b": "B",
+                "branch_c": "C",
+                "join": "joined",
+            }
+        )
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        result = await runner.execute_plan(DIAMOND_PLAN, checkpoint=False)
+
+        assert result.success
+        assert len(result.steps) == 5
+        # All branches should have completed
+        assert result.variables.get("init_result") == "initialized"
+        assert result.variables.get("branch_a_result") == "A"
+        assert result.variables.get("branch_b_result") == "B"
+        assert result.variables.get("branch_c_result") == "C"
+        assert result.variables.get("join_result") == "joined"
+
+    @pytest.mark.asyncio
+    async def test_execution_with_variables(self, tmp_path):
+        """Variable overrides are passed through."""
+        tm = FakeToolManager({"fetch": "[user1, user2]", "process": "done"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        result = await runner.execute_plan(
+            VARS_PLAN,
+            variables={"base_url": "http://api.example.com"},
+            checkpoint=False,
+        )
+
+        assert result.success
+        # The overridden base_url should have been used
+        assert result.variables.get("base_url") == "http://api.example.com"
+
+    @pytest.mark.asyncio
+    async def test_step_failure_stops_execution(self, tmp_path):
+        """When a step fails, execution stops and error is reported."""
+        tm = FailingToolManager({"search_code"}, {"read_file": "data"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        result = await runner.execute_plan(SAMPLE_PLAN, checkpoint=False)
+
+        assert not result.success
+        assert "search_code failed" in result.error
+
+    @pytest.mark.asyncio
+    async def test_empty_plan(self, tmp_path):
+        """Empty plan succeeds immediately."""
+        tm = FakeToolManager()
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        result = await runner.execute_plan(
+            {"id": "empty", "title": "Empty", "steps": []},
+            checkpoint=False,
+        )
+
+        assert result.success
+        assert len(result.steps) == 0
+
+    @pytest.mark.asyncio
+    async def test_execution_callbacks(self, tmp_path):
+        """Callbacks fire for each step during live execution."""
+        tm = FakeToolManager({"read_file": "data", "search_code": "found"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        started = []
+        completed = []
+
+        runner = PlanRunner(
+            context,
+            on_step_start=lambda i, t, tn: started.append((i, t, tn)),
+            on_step_complete=lambda sr: completed.append(sr.step_title),
+            enable_guards=False,
+        )
+
+        await runner.execute_plan(SAMPLE_PLAN, checkpoint=False)
+
+        assert len(started) == 2
+        assert len(completed) == 2
+        assert started[0] == ("1", "Read file", "read_file")
+
+    @pytest.mark.asyncio
+    async def test_checkpoint_after_execution(self, tmp_path):
+        """Execution checkpoints are saved after each batch."""
+        tm = FakeToolManager({"read_file": "data", "search_code": "found"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        result = await runner.execute_plan(SAMPLE_PLAN, checkpoint=True)
+
+        assert result.success
+        checkpoint_path = context.plans_dir / "test-plan-001_state.json"
+        assert checkpoint_path.exists()
+
+        data = json.loads(checkpoint_path.read_text())
+        assert data["status"] == "completed"
+        assert "1" in data["completed_steps"]
+        assert "2" in data["completed_steps"]
+
+    @pytest.mark.asyncio
+    async def test_tool_field_fallback(self, tmp_path):
+        """Steps with 'tool' field (not 'tool_calls') work correctly."""
+        tm = FakeToolManager({"my_tool": "result"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        plan = {
+            "id": "tool-field",
+            "title": "Tool Field Plan",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Do thing",
+                    "tool": "my_tool",
+                    "args": {"x": 1},
+                },
+            ],
+        }
+
+        result = await runner.execute_plan(plan, checkpoint=False)
+        assert result.success
+        assert result.steps[0].tool_name == "my_tool"
+
+
+# ── Tests: Topological Batching ────────────────────────────────────────────
+
+
+class TestComputeBatches:
+    """Test _compute_batches topological sort."""
+
+    def test_empty_steps(self):
+        assert _compute_batches([]) == []
+
+    def test_single_step(self):
+        steps = [{"index": "1", "title": "A"}]
+        batches = _compute_batches(steps)
+        assert len(batches) == 1
+        assert len(batches[0]) == 1
+
+    def test_linear_chain(self):
+        """A → B → C produces 3 batches of 1."""
+        steps = [
+            {"index": "1", "title": "A"},
+            {"index": "2", "title": "B", "depends_on": ["1"]},
+            {"index": "3", "title": "C", "depends_on": ["2"]},
+        ]
+        batches = _compute_batches(steps)
+        assert len(batches) == 3
+        assert len(batches[0]) == 1
+        assert len(batches[1]) == 1
+        assert len(batches[2]) == 1
+
+    def test_parallel_steps(self):
+        """Two independent steps produce 1 batch of 2."""
+        steps = [
+            {"index": "1", "title": "A"},
+            {"index": "2", "title": "B"},
+        ]
+        batches = _compute_batches(steps)
+        assert len(batches) == 1
+        assert len(batches[0]) == 2
+
+    def test_diamond_dag(self):
+        """Diamond: 1 → (2,3) → 4 produces 3 batches."""
+        steps = [
+            {"index": "1", "title": "Root"},
+            {"index": "2", "title": "Left", "depends_on": ["1"]},
+            {"index": "3", "title": "Right", "depends_on": ["1"]},
+            {"index": "4", "title": "Join", "depends_on": ["2", "3"]},
+        ]
+        batches = _compute_batches(steps)
+        assert len(batches) == 3
+        assert len(batches[0]) == 1  # Root
+        assert len(batches[1]) == 2  # Left, Right (parallel)
+        assert len(batches[2]) == 1  # Join
+
+    def test_wide_dag(self):
+        """5 independent roots + 1 join = 2 batches."""
+        steps = [
+            {"index": "1", "title": "A"},
+            {"index": "2", "title": "B"},
+            {"index": "3", "title": "C"},
+            {"index": "4", "title": "D"},
+            {"index": "5", "title": "E"},
+            {"index": "6", "title": "Join", "depends_on": ["1", "2", "3", "4", "5"]},
+        ]
+        batches = _compute_batches(steps)
+        assert len(batches) == 2
+        assert len(batches[0]) == 5  # All roots parallel
+        assert len(batches[1]) == 1  # Join
+
+    def test_missing_dependency_ignored(self):
+        """Dependencies on non-existent steps are ignored."""
+        steps = [
+            {"index": "1", "title": "A", "depends_on": ["99"]},
+        ]
+        batches = _compute_batches(steps)
+        assert len(batches) == 1
+
+    def test_auto_assigns_index(self):
+        """Steps without explicit index get auto-assigned."""
+        steps = [
+            {"title": "A"},
+            {"title": "B"},
+        ]
+        batches = _compute_batches(steps)
+        assert len(batches) == 1
+        assert len(batches[0]) == 2
+
+
+# ── Tests: Variable Resolution ─────────────────────────────────────────────
+
+
+class TestVariableResolution:
+    """Test ${var} resolution in tool arguments."""
+
+    def test_no_variables(self):
+        result = _resolve_variables({"path": "/tmp/test.py"}, {})
+        assert result == {"path": "/tmp/test.py"}
+
+    def test_simple_variable(self):
+        result = _resolve_variables(
+            {"path": "${target_path}"},
+            {"target_path": "/tmp/test.py"},
+        )
+        assert result == {"path": "/tmp/test.py"}
+
+    def test_template_string(self):
+        result = _resolve_variables(
+            {"url": "${base}/api/${version}"},
+            {"base": "http://localhost", "version": "v2"},
+        )
+        assert result == {"url": "http://localhost/api/v2"}
+
+    def test_nested_path(self):
+        result = _resolve_variables(
+            {"port": "${config.server.port}"},
+            {"config": {"server": {"port": 8080}}},
+        )
+        assert result == {"port": 8080}
+
+    def test_unresolved_variable_preserved(self):
+        result = _resolve_variables(
+            {"x": "${missing}"},
+            {},
+        )
+        assert result == {"x": "${missing}"}
+
+    def test_preserves_type(self):
+        """Single ${var} reference preserves the original type (not stringified)."""
+        result = _resolve_variables(
+            {"count": "${n}"},
+            {"n": 42},
+        )
+        assert result == {"count": 42}
+
+    def test_list_values(self):
+        result = _resolve_variables(
+            {"items": ["${a}", "${b}"]},
+            {"a": "alpha", "b": "beta"},
+        )
+        assert result == {"items": ["alpha", "beta"]}
+
+    def test_nested_dict(self):
+        result = _resolve_variables(
+            {"opts": {"key": "${val}"}},
+            {"val": "resolved"},
+        )
+        assert result == {"opts": {"key": "resolved"}}
+
+    def test_non_string_passthrough(self):
+        result = _resolve_value(42, {"x": 1})
+        assert result == 42
+
+    def test_none_passthrough(self):
+        result = _resolve_value(None, {})
+        assert result is None
+
+    def test_bool_passthrough(self):
+        result = _resolve_value(True, {})
+        assert result is True
+
+
+# ── Tests: Checkpointing ────────────────────────────────────────────────────
+
+
+class TestPlanRunnerCheckpoint:
+    """Test execution checkpointing."""
+
+    @pytest.mark.asyncio
+    async def test_checkpoint_saved(self, tmp_path):
+        tm = FakeToolManager()
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        runner._save_checkpoint(
+            "test-plan-001",
+            completed_steps=["1", "2"],
+            variables={"file_content": "hello"},
+            status="completed",
+        )
+
+        checkpoint_path = context.plans_dir / "test-plan-001_state.json"
+        assert checkpoint_path.exists()
+
+        data = json.loads(checkpoint_path.read_text())
+        assert data["plan_id"] == "test-plan-001"
+        assert data["status"] == "completed"
+        assert data["completed_steps"] == ["1", "2"]
+
+    @pytest.mark.asyncio
+    async def test_checkpoint_loaded(self, tmp_path):
+        tm = FakeToolManager()
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        runner._save_checkpoint(
+            "test-plan-002",
+            completed_steps=["1"],
+            variables={"x": "y"},
+            status="running",
+        )
+
+        checkpoint = runner.load_checkpoint("test-plan-002")
+        assert checkpoint is not None
+        assert checkpoint["status"] == "running"
+        assert checkpoint["completed_steps"] == ["1"]
+
+    @pytest.mark.asyncio
+    async def test_checkpoint_not_found(self, tmp_path):
+        tm = FakeToolManager()
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        assert runner.load_checkpoint("nonexistent") is None
+
+    @pytest.mark.asyncio
+    async def test_failed_execution_checkpoints(self, tmp_path):
+        """Failed execution should save a checkpoint with 'failed' status."""
+        tm = FailingToolManager({"search_code"}, {"read_file": "data"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        result = await runner.execute_plan(SAMPLE_PLAN, checkpoint=True)
+
+        assert not result.success
+        checkpoint_path = context.plans_dir / "test-plan-001_state.json"
+        assert checkpoint_path.exists()
+
+        data = json.loads(checkpoint_path.read_text())
+        assert data["status"] == "failed"
+        assert "1" in data["completed_steps"]
+
+
+# ── Tests: DAG Visualization ────────────────────────────────────────────────
+
+
+class TestRenderPlanDag:
+    """Test ASCII DAG rendering."""
+
+    def test_empty_plan(self):
+        result = render_plan_dag({"steps": []})
+        assert "empty plan" in result
+
+    def test_linear_plan(self):
+        dag = render_plan_dag(SAMPLE_PLAN)
+        assert "Read file" in dag
+        assert "Search code" in dag
+        assert "read_file" in dag
+        assert "search_code" in dag
+        assert "after: 1" in dag
+
+    def test_parallel_plan(self):
+        dag = render_plan_dag(PARALLEL_PLAN)
+        assert "Read file A" in dag
+        assert "Read file B" in dag
+        assert "Merge results" in dag
+        assert "after: 1, 2" in dag
+
+    def test_parallel_marker(self):
+        """Parallel steps should have ∥ marker."""
+        dag = render_plan_dag(PARALLEL_PLAN)
+        # Steps 1 and 2 are parallel — should have ∥ marker
+        assert "∥" in dag
+
+    def test_status_indicators(self):
+        plan = {
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Done step",
+                    "tool_calls": [{"name": "tool_a"}],
+                    "_status": "completed",
+                },
+                {
+                    "index": "2",
+                    "title": "Running step",
+                    "tool_calls": [{"name": "tool_b"}],
+                    "_status": "running",
+                    "depends_on": ["1"],
+                },
+                {
+                    "index": "3",
+                    "title": "Pending step",
+                    "tool_calls": [{"name": "tool_c"}],
+                    "_status": "pending",
+                    "depends_on": ["2"],
+                },
+                {
+                    "index": "4",
+                    "title": "Failed step",
+                    "tool_calls": [{"name": "tool_d"}],
+                    "_status": "failed",
+                },
+            ]
+        }
+        dag = render_plan_dag(plan)
+        assert "●" in dag  # completed
+        assert "◉" in dag  # running
+        assert "○" in dag  # pending
+        assert "✗" in dag  # failed
+
+    def test_tool_field_fallback(self):
+        """Handles steps with 'tool' field instead of 'tool_calls'."""
+        plan = {
+            "steps": [
+                {"index": "1", "title": "Step A", "tool": "my_tool"},
+            ]
+        }
+        dag = render_plan_dag(plan)
+        assert "my_tool" in dag
+
+
+# ── Tests: LLM-Driven Execution ───────────────────────────────────────────
+
+
+class TestLLMDrivenExecution:
+    """Test agentic LLM-driven step execution.
+
+    The agentic loop feeds ALL tool results (success and failure) back to the
+    LLM, letting it evaluate results and decide whether to retry, try
+    different parameters, or signal step completion with a text response.
+    """
+
+    def _make_model_manager(self, responses: list[dict]) -> MagicMock:
+        """Create a mock ModelManager with a sequence of LLM responses.
+
+        Each response can have 'tool_calls' (LLM wants to call a tool)
+        or just 'content' (LLM signals step complete).
+        """
+        client = AsyncMock()
+        client.create_completion = AsyncMock(side_effect=responses)
+
+        mm = MagicMock(spec=ModelManagerProtocol)
+        mm.get_client.return_value = client
+        return mm
+
+    def _tool_call_response(self, name: str, args: dict) -> dict:
+        """Build a mock LLM response containing a tool call.
+
+        Uses the chuk_llm native format (top-level tool_calls key).
+        """
+        return {
+            "response": None,
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {
+                        "name": name,
+                        "arguments": json.dumps(args),
+                    },
+                }
+            ],
+            "usage": {"prompt_tokens": 50, "completion_tokens": 15},
+        }
+
+    def _text_response(self, text: str = "Step complete.") -> dict:
+        """Build a mock LLM response with text only (no tool call).
+
+        Uses the chuk_llm native format (top-level response key).
+        """
+        return {
+            "response": text,
+            "tool_calls": None,
+            "usage": {"prompt_tokens": 100, "completion_tokens": 20},
+        }
+
+    @pytest.mark.asyncio
+    async def test_no_model_manager_uses_static(self, tmp_path):
+        """Without model_manager, falls back to static arg execution."""
+        tm = FakeToolManager(results={"read_file": "file content"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        plan = {
+            "id": "test",
+            "title": "Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Read",
+                    "tool": "read_file",
+                    "args": {"path": "/tmp/x"},
+                },
+            ],
+        }
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        assert result.success
+        assert len(result.steps) == 1
+
+    @pytest.mark.asyncio
+    async def test_failure_without_llm_stops_plan(self, tmp_path):
+        """Without LLM, a failed step stops the plan."""
+        tm = FailingToolManager({"search_code"}, {"read_file": "data"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)
+
+        result = await runner.execute_plan(SAMPLE_PLAN, checkpoint=False)
+
+        assert not result.success
+
+    @pytest.mark.asyncio
+    async def test_agentic_loop_success_then_text(self, tmp_path):
+        """LLM calls tool, sees result, then responds with text = step done."""
+        tm = FakeToolManager(results={"geocode_location": {"lat": 52.0, "lon": 0.85}})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        mm = self._make_model_manager(
+            [
+                # Turn 1: LLM calls the tool
+                self._tool_call_response("geocode_location", {"name": "Leavenheath"}),
+                # Turn 2: LLM sees the successful result, responds with text
+                self._text_response("Geocoded: lat=52.0, lon=0.85"),
+            ]
+        )
+
+        runner = PlanRunner(context, model_manager=mm, enable_guards=False)
+
+        plan = {
+            "id": "test",
+            "title": "Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Geocode location",
+                    "tool": "geocode_location",
+                    "args": {"name": "Leavenheath"},
+                    "result_variable": "geo_result",
+                },
+            ],
+        }
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        assert result.success
+        assert result.steps[0].success
+        assert result.variables["geo_result"] == {"lat": 52.0, "lon": 0.85}
+
+    @pytest.mark.asyncio
+    async def test_agentic_loop_retry_on_failure(self, tmp_path):
+        """LLM calls tool with wrong args, sees error, retries with correct args."""
+        call_count = 0
+
+        async def smart_execute(tool_name, arguments, namespace=None, timeout=None):
+            nonlocal call_count
+            call_count += 1
+            if arguments.get("latitude") and isinstance(arguments["latitude"], str):
+                return FakeToolCallResult(
+                    tool_name=tool_name,
+                    success=False,
+                    error="expected number, got str",
+                )
+            return FakeToolCallResult(
+                tool_name=tool_name,
+                result={"temperature": 15.5},
+            )
+
+        tm = FakeToolManager()
+        tm.execute_tool = smart_execute
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        mm = self._make_model_manager(
+            [
+                # Turn 1: LLM passes string lat/lon
+                self._tool_call_response(
+                    "get_weather", {"latitude": "52.0", "longitude": "0.85"}
+                ),
+                # Turn 2: LLM sees error, retries with numbers
+                self._tool_call_response(
+                    "get_weather", {"latitude": 52.0, "longitude": 0.85}
+                ),
+                # Turn 3: LLM sees success, signals completion
+                self._text_response("Weather: 15.5°C"),
+            ]
+        )
+
+        runner = PlanRunner(context, model_manager=mm, enable_guards=False)
+
+        plan = {
+            "id": "test",
+            "title": "Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Get weather",
+                    "tool": "get_weather",
+                    "args": {"latitude": 52.0, "longitude": 0.85},
+                    "result_variable": "weather",
+                },
+            ],
+        }
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        assert result.success
+        assert result.variables["weather"] == {"temperature": 15.5}
+        assert call_count == 2  # First call failed, second succeeded
+
+    @pytest.mark.asyncio
+    async def test_agentic_loop_retry_on_empty_result(self, tmp_path):
+        """LLM sees null/empty result and retries with different params."""
+        call_count = 0
+
+        async def retry_execute(tool_name, arguments, namespace=None, timeout=None):
+            nonlocal call_count
+            call_count += 1
+            if arguments.get("name") == "Leavenheath, Suffolk":
+                return FakeToolCallResult(tool_name=tool_name, result={"results": None})
+            # Simpler name works
+            return FakeToolCallResult(
+                tool_name=tool_name, result={"results": [{"lat": 52.0}]}
+            )
+
+        tm = FakeToolManager()
+        tm.execute_tool = retry_execute
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        mm = self._make_model_manager(
+            [
+                # Turn 1: LLM tries full name
+                self._tool_call_response("geocode", {"name": "Leavenheath, Suffolk"}),
+                # Turn 2: LLM sees null results, tries simpler name
+                self._tool_call_response("geocode", {"name": "Leavenheath"}),
+                # Turn 3: LLM sees good result, signals done
+                self._text_response("Found coordinates."),
+            ]
+        )
+
+        runner = PlanRunner(context, model_manager=mm, enable_guards=False)
+
+        plan = {
+            "id": "test",
+            "title": "Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Geocode",
+                    "tool": "geocode",
+                    "args": {"name": "Leavenheath, Suffolk"},
+                    "result_variable": "geo",
+                },
+            ],
+        }
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        assert result.success
+        assert result.variables["geo"] == {"results": [{"lat": 52.0}]}
+        assert call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_agentic_loop_max_turns_exhausted_with_result(self, tmp_path):
+        """When max turns exhausted but we have a result, return success."""
+        tm = FakeToolManager(results={"tool_a": "data"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        # LLM keeps calling tools until turns run out (never sends text)
+        mm = self._make_model_manager(
+            [
+                self._tool_call_response("tool_a", {"x": 1}),
+                self._tool_call_response("tool_a", {"x": 2}),
+                self._tool_call_response("tool_a", {"x": 3}),
+            ]
+        )
+
+        runner = PlanRunner(
+            context, model_manager=mm, enable_guards=False, max_step_retries=2
+        )
+
+        plan = {
+            "id": "test",
+            "title": "Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Do thing",
+                    "tool": "tool_a",
+                    "args": {"x": 1},
+                    "result_variable": "out",
+                },
+            ],
+        }
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        # Should succeed because we got a result
+        assert result.success
+        assert result.variables["out"] == "data"
+
+    @pytest.mark.asyncio
+    async def test_agentic_loop_max_turns_exhausted_no_result(self, tmp_path):
+        """When max turns exhausted with no success, return failure."""
+        tm = FailingToolManager({"tool_a"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        mm = self._make_model_manager(
+            [
+                self._tool_call_response("tool_a", {"x": 1}),
+                self._tool_call_response("tool_a", {"x": 2}),
+                self._tool_call_response("tool_a", {"x": 3}),
+            ]
+        )
+
+        runner = PlanRunner(
+            context, model_manager=mm, enable_guards=False, max_step_retries=2
+        )
+
+        plan = {
+            "id": "test",
+            "title": "Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Do thing",
+                    "tool": "tool_a",
+                    "args": {"x": 1},
+                },
+            ],
+        }
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        assert not result.success
+
+    @pytest.mark.asyncio
+    async def test_replanned_flag_on_result(self, tmp_path):
+        """PlanExecutionResult supports replanned flag."""
+        result = PlanExecutionResult(
+            plan_id="test",
+            plan_title="Test",
+            success=True,
+            replanned=True,
+        )
+        assert result.replanned is True
+
+    @pytest.mark.asyncio
+    async def test_model_manager_protocol(self):
+        """ModelManagerProtocol is satisfied by objects with get_client()."""
+        mm = MagicMock()
+        mm.get_client = MagicMock(return_value=AsyncMock())
+        assert isinstance(mm, ModelManagerProtocol)
+
+
+# ── Tests: Serialize Variables ───────────────────────────────────────────────
+
+
+class TestSerializeVariables:
+    """Test _serialize_variables helper."""
+
+    def test_short_values_preserved(self):
+        result = _serialize_variables({"x": "hello", "n": 42})
+        assert result == {"x": "hello", "n": 42}
+
+    def test_long_string_truncated(self):
+        long_str = "a" * 2000
+        result = _serialize_variables({"data": long_str})
+        assert result["data"].endswith("... [truncated]")
+        assert len(result["data"]) < 1100
+
+    def test_large_dict_summarized(self):
+        big_dict = {f"key_{i}": f"val_{i}" for i in range(200)}
+        result = _serialize_variables({"config": big_dict})
+        assert "dict" in result["config"]
+
+    def test_small_dict_preserved(self):
+        small_dict = {"a": 1, "b": 2}
+        result = _serialize_variables({"config": small_dict})
+        assert result["config"] == {"a": 1, "b": 2}
+
+
+# ── Tests: Summarize Variables ───────────────────────────────────────────────
+
+
+class TestSummarizeVariables:
+    """Test _summarize_variables helper."""
+
+    def test_empty_context(self):
+        assert _summarize_variables({}) == "none"
+
+    def test_single_variable(self):
+        result = _summarize_variables({"geo": {"lat": 52.0}})
+        assert "${geo}" in result
+        assert "52.0" in result
+
+    def test_multiple_variables(self):
+        result = _summarize_variables({"a": 1, "b": "hello"})
+        assert "${a}" in result
+        assert "${b}" in result
+        assert "1" in result
+        assert "hello" in result
+
+    def test_long_value_truncated(self):
+        long_val = {"data": "x" * 1000}
+        result = _summarize_variables({"big": long_val})
+        assert "..." in result
+
+
+# ── Tests: Maybe Await ───────────────────────────────────────────────────────
+
+
+class TestMaybeAwait:
+    """Test _maybe_await helper for sync/async callback support."""
+
+    @pytest.mark.asyncio
+    async def test_sync_value_returned(self):
+        result = await _maybe_await(42)
+        assert result == 42
+
+    @pytest.mark.asyncio
+    async def test_none_returned(self):
+        result = await _maybe_await(None)
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_coroutine_awaited(self):
+        async def async_fn():
+            return "async_result"
+
+        result = await _maybe_await(async_fn())
+        assert result == "async_result"
+
+    @pytest.mark.asyncio
+    async def test_sync_callback_result(self):
+        def sync_callback(x):
+            return x * 2
+
+        result = await _maybe_await(sync_callback(5))
+        assert result == 10
+
+    @pytest.mark.asyncio
+    async def test_async_callback_result(self):
+        async def async_callback(x):
+            return x * 2
+
+        result = await _maybe_await(async_callback(5))
+        assert result == 10
+
+
+# ── Tests: Extract Tool Call ──────────────────────────────────────────────────
+
+
+class TestExtractToolCall:
+    """Test _extract_tool_call with different response formats."""
+
+    def test_chuk_llm_native_format_with_tool_calls(self):
+        """chuk_llm returns {"response": null, "tool_calls": [...], "usage": {...}}."""
+        response = {
+            "response": None,
+            "tool_calls": [
+                {
+                    "id": "call_abc123",
+                    "type": "function",
+                    "function": {
+                        "name": "geocode_location",
+                        "arguments": '{"name": "London"}',
+                    },
+                }
+            ],
+            "usage": {"prompt_tokens": 53, "completion_tokens": 15},
+        }
+        result = _extract_tool_call(response)
+        assert result is not None
+        assert result["name"] == "geocode_location"
+        assert result["args"] == {"name": "London"}
+
+    def test_chuk_llm_native_format_text_response(self):
+        """chuk_llm text response: {"response": "text", "tool_calls": null}."""
+        response = {
+            "response": "The temperature is 12.3 degrees.",
+            "tool_calls": None,
+            "usage": {"prompt_tokens": 100, "completion_tokens": 20},
+        }
+        result = _extract_tool_call(response)
+        assert result is None
+
+    def test_chuk_llm_native_format_empty_tool_calls(self):
+        """chuk_llm with empty tool_calls list."""
+        response = {
+            "response": "Done",
+            "tool_calls": [],
+            "usage": {},
+        }
+        result = _extract_tool_call(response)
+        assert result is None
+
+    def test_openai_format_with_tool_calls(self):
+        """OpenAI-style: {"choices": [{"message": {"tool_calls": [...]}}]}."""
+        response = {
+            "choices": [
+                {
+                    "message": {
+                        "tool_calls": [
+                            {
+                                "id": "call_1",
+                                "type": "function",
+                                "function": {
+                                    "name": "get_weather",
+                                    "arguments": '{"lat": 52.0}',
+                                },
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+        result = _extract_tool_call(response)
+        assert result is not None
+        assert result["name"] == "get_weather"
+        assert result["args"] == {"lat": 52.0}
+
+    def test_openai_format_text_response(self):
+        """OpenAI-style text response: no tool_calls in message."""
+        response = {"choices": [{"message": {"content": "Hello!"}}]}
+        result = _extract_tool_call(response)
+        assert result is None
+
+    def test_none_response(self):
+        assert _extract_tool_call(None) is None
+
+    def test_empty_dict(self):
+        assert _extract_tool_call({}) is None
+
+    def test_dict_args_not_string(self):
+        """Arguments already parsed as dict (not JSON string)."""
+        response = {
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {
+                        "name": "test_tool",
+                        "arguments": {"key": "value"},
+                    },
+                }
+            ],
+        }
+        result = _extract_tool_call(response)
+        assert result is not None
+        assert result["args"] == {"key": "value"}
+
+    def test_invalid_json_arguments(self):
+        """Malformed JSON in arguments defaults to empty dict."""
+        response = {
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {
+                        "name": "test_tool",
+                        "arguments": "not valid json{",
+                    },
+                }
+            ],
+        }
+        result = _extract_tool_call(response)
+        assert result is not None
+        assert result["name"] == "test_tool"
+        assert result["args"] == {}
+
+
+class TestParseToolCallEntry:
+    """Test _parse_tool_call_entry with dict and object formats."""
+
+    def test_dict_entry(self):
+        tc = {
+            "id": "call_1",
+            "type": "function",
+            "function": {"name": "my_tool", "arguments": '{"x": 1}'},
+        }
+        result = _parse_tool_call_entry(tc)
+        assert result == {"name": "my_tool", "args": {"x": 1}}
+
+    def test_object_entry(self):
+        class FuncObj:
+            name = "my_tool"
+            arguments = '{"x": 1}'
+
+        class TCObj:
+            function = FuncObj()
+
+        result = _parse_tool_call_entry(TCObj())
+        assert result == {"name": "my_tool", "args": {"x": 1}}
+
+    def test_no_function_returns_none(self):
+        result = _parse_tool_call_entry("not a tool call")
+        assert result is None
+
+    def test_empty_name_returns_none(self):
+        tc = {"function": {"name": "", "arguments": "{}"}}
+        result = _parse_tool_call_entry(tc)
+        assert result is None
+
+
+# ── Tests: Error Paths & Edge Cases ──────────────────────────────────────────
+
+
+class TestAgenticLoopErrorPaths:
+    """Error paths in the agentic LLM loop.
+
+    Covers: RuntimeError guard, LLM exceptions mid-loop, tool callbacks
+    with async/sync variants, and turn-0 text fallback.
+    """
+
+    def _make_model_manager(self, responses: list[dict]) -> MagicMock:
+        client = AsyncMock()
+        client.create_completion = AsyncMock(side_effect=responses)
+        mm = MagicMock(spec=ModelManagerProtocol)
+        mm.get_client.return_value = client
+        return mm
+
+    def _tool_call_response(self, name: str, args: dict) -> dict:
+        return {
+            "response": None,
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {
+                        "name": name,
+                        "arguments": json.dumps(args),
+                    },
+                }
+            ],
+            "usage": {"prompt_tokens": 50, "completion_tokens": 15},
+        }
+
+    def _text_response(self, text: str = "Step complete.") -> dict:
+        return {
+            "response": text,
+            "tool_calls": None,
+            "usage": {"prompt_tokens": 100, "completion_tokens": 20},
+        }
+
+    @pytest.mark.asyncio
+    async def test_missing_model_manager_raises_runtime_error(self, tmp_path):
+        """_execute_step_with_llm raises RuntimeError without model_manager."""
+        tm = FakeToolManager(results={"tool_a": "ok"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+        runner = PlanRunner(context, enable_guards=False)  # No model_manager
+
+        with pytest.raises(RuntimeError, match="requires model_manager"):
+            await runner._execute_step_with_llm(
+                step={"index": "1", "title": "X", "tool": "tool_a", "args": {}},
+                var_context={},
+                step_index="1",
+                step_title="X",
+                hint_tool="tool_a",
+                hint_args={},
+            )
+
+    @pytest.mark.asyncio
+    async def test_llm_exception_mid_loop_retries(self, tmp_path):
+        """If the LLM raises an exception on one turn, the loop retries."""
+        tm = FakeToolManager(results={"tool_a": "result_data"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        client = AsyncMock()
+        client.create_completion = AsyncMock(
+            side_effect=[
+                # Turn 1: LLM raises an exception
+                RuntimeError("LLM connection failed"),
+                # Turn 2: LLM succeeds with a tool call
+                self._tool_call_response("tool_a", {"x": 1}),
+                # Turn 3: LLM signals done
+                self._text_response("Done"),
+            ]
+        )
+        mm = MagicMock(spec=ModelManagerProtocol)
+        mm.get_client.return_value = client
+
+        runner = PlanRunner(context, model_manager=mm, enable_guards=False)
+
+        plan = {
+            "id": "exc-test",
+            "title": "Exception Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Do thing",
+                    "tool": "tool_a",
+                    "args": {"x": 1},
+                    "result_variable": "out",
+                },
+            ],
+        }
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        assert result.success
+        assert result.variables["out"] == "result_data"
+
+    @pytest.mark.asyncio
+    async def test_turn_0_text_falls_back_to_static(self, tmp_path):
+        """If LLM responds with text on turn 0, fall back to static execution."""
+        tm = FakeToolManager(results={"tool_a": "static_result"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        mm = self._make_model_manager(
+            [
+                self._text_response("I don't need to call a tool"),
+            ]
+        )
+
+        runner = PlanRunner(context, model_manager=mm, enable_guards=False)
+
+        plan = {
+            "id": "fallback-test",
+            "title": "Fallback Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Do thing",
+                    "tool": "tool_a",
+                    "args": {"key": "val"},
+                    "result_variable": "out",
+                },
+            ],
+        }
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        assert result.success
+        assert result.variables["out"] == "static_result"
+        # Should have used static execution, so one tool call
+        assert len(tm.calls) == 1
+        assert tm.calls[0] == ("tool_a", {"key": "val"})
+
+    @pytest.mark.asyncio
+    async def test_tool_callbacks_fire_in_agentic_loop(self, tmp_path):
+        """on_tool_start / on_tool_complete fire for each tool call in loop."""
+        tm = FakeToolManager(results={"tool_a": "ok"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        mm = self._make_model_manager(
+            [
+                self._tool_call_response("tool_a", {"x": 1}),
+                self._tool_call_response("tool_a", {"x": 2}),
+                self._text_response("Done"),
+            ]
+        )
+
+        tool_starts: list[tuple[str, dict]] = []
+        tool_completes: list[tuple[str, bool]] = []
+
+        runner = PlanRunner(
+            context,
+            model_manager=mm,
+            enable_guards=False,
+            on_tool_start=lambda name, args: tool_starts.append((name, args)),
+            on_tool_complete=lambda name, result, ok, elapsed: tool_completes.append(
+                (name, ok)
+            ),
+        )
+
+        plan = {
+            "id": "cb-test",
+            "title": "Callback Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Do thing",
+                    "tool": "tool_a",
+                    "args": {"x": 1},
+                },
+            ],
+        }
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        assert result.success
+        assert len(tool_starts) == 2
+        assert tool_starts[0] == ("tool_a", {"x": 1})
+        assert tool_starts[1] == ("tool_a", {"x": 2})
+        assert len(tool_completes) == 2
+        assert all(ok for _, ok in tool_completes)
+
+    @pytest.mark.asyncio
+    async def test_async_tool_callbacks(self, tmp_path):
+        """Async tool callbacks are properly awaited."""
+        tm = FakeToolManager(results={"tool_a": "ok"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        mm = self._make_model_manager(
+            [
+                self._tool_call_response("tool_a", {"x": 1}),
+                self._text_response("Done"),
+            ]
+        )
+
+        started = []
+        completed = []
+
+        async def on_start(name, args):
+            started.append(name)
+
+        async def on_complete(name, result, ok, elapsed):
+            completed.append((name, ok))
+
+        runner = PlanRunner(
+            context,
+            model_manager=mm,
+            enable_guards=False,
+            on_tool_start=on_start,
+            on_tool_complete=on_complete,
+        )
+
+        plan = {
+            "id": "async-cb-test",
+            "title": "Async Callback Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Do thing",
+                    "tool": "tool_a",
+                    "args": {"x": 1},
+                },
+            ],
+        }
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        assert result.success
+        assert started == ["tool_a"]
+        assert completed == [("tool_a", True)]
+
+    @pytest.mark.asyncio
+    async def test_text_on_later_turn_without_result_fails(self, tmp_path):
+        """LLM text on turn > 0 but no prior success → step failure."""
+        tm = FailingToolManager({"tool_a"})
+        context = PlanningContext(tm, plans_dir=tmp_path / "plans")
+
+        mm = self._make_model_manager(
+            [
+                # Turn 1: tool call fails
+                self._tool_call_response("tool_a", {"x": 1}),
+                # Turn 2: LLM gives up with text (no good result stored)
+                self._text_response("I could not complete this step."),
+            ]
+        )
+
+        runner = PlanRunner(context, model_manager=mm, enable_guards=False)
+
+        plan = {
+            "id": "fail-test",
+            "title": "Fail Test",
+            "steps": [
+                {
+                    "index": "1",
+                    "title": "Do thing",
+                    "tool": "tool_a",
+                    "args": {"x": 1},
+                },
+            ],
+        }
+        result = await runner.execute_plan(plan, checkpoint=False)
+
+        assert not result.success
+        assert result.steps[0].error is not None
diff --git a/tests/planning/test_tools.py b/tests/planning/test_tools.py
new file mode 100644
index 00000000..0dcca3cf
--- /dev/null
+++ b/tests/planning/test_tools.py
@@ -0,0 +1,377 @@
+# tests/planning/test_tools.py
+"""Tests for planning/tools.py — plan tool definitions and handler."""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from typing import Any
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from mcp_cli.planning.tools import (
+    _PLAN_TOOL_NAMES,
+    _validate_step,
+    get_plan_tools_as_dicts,
+    handle_plan_tool,
+)
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+
+@dataclass
+class FakeToolInfo:
+    name: str
+
+
+class FakeToolManager:
+    """Minimal ToolManager stub."""
+
+    def __init__(self, tool_names: list[str] | None = None):
+        self._tool_names = tool_names or ["read_file", "write_file"]
+
+    async def get_all_tools(self) -> list[FakeToolInfo]:
+        return [FakeToolInfo(name=n) for n in self._tool_names]
+
+    async def get_adapted_tools_for_llm(self, provider: str) -> tuple[list[dict], dict]:
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": n,
+                    "description": f"Tool: {n}",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "path": {"type": "string", "description": "File path"}
+                        },
+                        "required": ["path"],
+                    },
+                },
+            }
+            for n in self._tool_names
+        ]
+        return tools, {}
+
+
+class FakePlanningContext:
+    """Minimal PlanningContext stub for tool tests."""
+
+    def __init__(self, tool_names: list[str] | None = None):
+        self.tool_manager = FakeToolManager(tool_names)
+        self._saved_plans: dict[str, dict] = {}
+
+    async def get_tool_catalog(self) -> list[dict[str, Any]]:
+        tools, _ = await self.tool_manager.get_adapted_tools_for_llm("openai")
+        return tools
+
+    async def save_plan_from_dict(self, plan_dict: dict[str, Any]) -> str:
+        plan_id = f"plan-{len(self._saved_plans) + 1}"
+        self._saved_plans[plan_id] = plan_dict
+        return plan_id
+
+    async def get_plan(self, plan_id: str) -> dict[str, Any] | None:
+        return self._saved_plans.get(plan_id)
+
+
+# ── Tests: tool definitions ──────────────────────────────────────────────────
+
+
+class TestGetPlanToolsAsDicts:
+    """Tests for get_plan_tools_as_dicts()."""
+
+    def test_returns_three_tools(self):
+        tools = get_plan_tools_as_dicts()
+        assert len(tools) == 3
+
+    def test_tool_names_match_frozenset(self):
+        tools = get_plan_tools_as_dicts()
+        names = {t["function"]["name"] for t in tools}
+        assert names == _PLAN_TOOL_NAMES
+
+    def test_all_have_function_type(self):
+        tools = get_plan_tools_as_dicts()
+        for tool in tools:
+            assert tool["type"] == "function"
+            assert "function" in tool
+            assert "name" in tool["function"]
+            assert "description" in tool["function"]
+            assert "parameters" in tool["function"]
+
+    def test_plan_create_requires_goal(self):
+        tools = get_plan_tools_as_dicts()
+        create = next(t for t in tools if t["function"]["name"] == "plan_create")
+        assert "goal" in create["function"]["parameters"]["properties"]
+        assert "goal" in create["function"]["parameters"]["required"]
+
+    def test_plan_execute_requires_plan_id(self):
+        tools = get_plan_tools_as_dicts()
+        execute = next(t for t in tools if t["function"]["name"] == "plan_execute")
+        assert "plan_id" in execute["function"]["parameters"]["properties"]
+        assert "plan_id" in execute["function"]["parameters"]["required"]
+
+    def test_plan_create_and_execute_requires_goal(self):
+        tools = get_plan_tools_as_dicts()
+        combined = next(
+            t for t in tools if t["function"]["name"] == "plan_create_and_execute"
+        )
+        assert "goal" in combined["function"]["parameters"]["properties"]
+        assert "goal" in combined["function"]["parameters"]["required"]
+
+
+class TestPlanToolNames:
+    """Tests for the _PLAN_TOOL_NAMES frozenset."""
+
+    def test_contains_expected_names(self):
+        assert "plan_create" in _PLAN_TOOL_NAMES
+        assert "plan_execute" in _PLAN_TOOL_NAMES
+        assert "plan_create_and_execute" in _PLAN_TOOL_NAMES
+
+    def test_is_frozenset(self):
+        assert isinstance(_PLAN_TOOL_NAMES, frozenset)
+
+    def test_has_exactly_three_entries(self):
+        assert len(_PLAN_TOOL_NAMES) == 3
+
+
+# ── Tests: _validate_step ────────────────────────────────────────────────────
+
+
+class TestValidateStep:
+    """Tests for the step validation function."""
+
+    def test_valid_step(self):
+        ok, msg = _validate_step(
+            {"tool": "read_file", "title": "Read"}, ["read_file", "write_file"]
+        )
+        assert ok is True
+        assert msg == ""
+
+    def test_unknown_tool(self):
+        ok, msg = _validate_step(
+            {"tool": "hack_server", "title": "Hack"}, ["read_file"]
+        )
+        assert ok is False
+        assert "Unknown tool" in msg
+
+    def test_missing_title(self):
+        ok, msg = _validate_step({"tool": "read_file", "title": ""}, ["read_file"])
+        assert ok is False
+        assert "title" in msg.lower()
+
+    def test_missing_tool_key(self):
+        ok, msg = _validate_step({"title": "Something"}, ["read_file"])
+        assert ok is False
+        assert "Unknown tool" in msg
+
+
+# ── Tests: handle_plan_tool ──────────────────────────────────────────────────
+
+
+class TestHandlePlanTool:
+    """Tests for the main handle_plan_tool dispatch function."""
+
+    @pytest.mark.asyncio
+    async def test_unknown_tool_returns_error(self):
+        ctx = FakePlanningContext()
+        result = await handle_plan_tool("plan_unknown", {}, ctx)
+        parsed = json.loads(result)
+        assert "error" in parsed
+        assert "Unknown plan tool" in parsed["error"]
+
+    @pytest.mark.asyncio
+    async def test_plan_create_missing_goal(self):
+        ctx = FakePlanningContext()
+        result = await handle_plan_tool("plan_create", {}, ctx)
+        parsed = json.loads(result)
+        assert "error" in parsed
+        assert "Goal" in parsed["error"] or "required" in parsed["error"].lower()
+
+    @pytest.mark.asyncio
+    async def test_plan_create_empty_goal(self):
+        ctx = FakePlanningContext()
+        result = await handle_plan_tool("plan_create", {"goal": ""}, ctx)
+        parsed = json.loads(result)
+        assert "error" in parsed
+
+    @pytest.mark.asyncio
+    async def test_plan_execute_missing_plan_id(self):
+        ctx = FakePlanningContext()
+        result = await handle_plan_tool("plan_execute", {}, ctx)
+        parsed = json.loads(result)
+        assert "error" in parsed
+        assert "plan_id" in parsed["error"]
+
+    @pytest.mark.asyncio
+    async def test_plan_execute_unknown_plan(self):
+        ctx = FakePlanningContext()
+        result = await handle_plan_tool("plan_execute", {"plan_id": "nonexistent"}, ctx)
+        parsed = json.loads(result)
+        assert "error" in parsed
+        assert "not found" in parsed["error"].lower()
+
+    @pytest.mark.asyncio
+    async def test_plan_create_and_execute_missing_goal(self):
+        ctx = FakePlanningContext()
+        result = await handle_plan_tool("plan_create_and_execute", {}, ctx)
+        parsed = json.loads(result)
+        assert "error" in parsed
+
+    @pytest.mark.asyncio
+    async def test_plan_create_success(self):
+        """Test plan_create with mocked PlanAgent."""
+        ctx = FakePlanningContext(["read_file", "write_file"])
+
+        fake_plan = {
+            "title": "Test Plan",
+            "steps": [
+                {"title": "Read", "tool": "read_file", "args": {"path": "/tmp/test"}},
+            ],
+        }
+
+        with patch("chuk_ai_planner.agents.plan_agent.PlanAgent") as MockAgent:
+            mock_agent = MockAgent.return_value
+            mock_agent.plan = AsyncMock(return_value=fake_plan)
+
+            result = await handle_plan_tool("plan_create", {"goal": "Read a file"}, ctx)
+
+        parsed = json.loads(result)
+        assert parsed["success"] is True
+        assert "plan_id" in parsed
+        assert parsed["title"] == "Test Plan"
+        assert len(parsed["steps"]) == 1
+
+    @pytest.mark.asyncio
+    async def test_plan_create_agent_returns_empty(self):
+        """Test plan_create when PlanAgent returns no steps."""
+        ctx = FakePlanningContext()
+
+        with patch("chuk_ai_planner.agents.plan_agent.PlanAgent") as MockAgent:
+            mock_agent = MockAgent.return_value
+            mock_agent.plan = AsyncMock(return_value={"title": "Empty", "steps": []})
+
+            result = await handle_plan_tool("plan_create", {"goal": "Do nothing"}, ctx)
+
+        parsed = json.loads(result)
+        assert "error" in parsed
+        assert "valid plan" in parsed["error"].lower()
+
+    @pytest.mark.asyncio
+    async def test_plan_create_agent_exception(self):
+        """Test plan_create when PlanAgent raises."""
+        ctx = FakePlanningContext()
+
+        with patch("chuk_ai_planner.agents.plan_agent.PlanAgent") as MockAgent:
+            mock_agent = MockAgent.return_value
+            mock_agent.plan = AsyncMock(side_effect=RuntimeError("LLM is down"))
+
+            result = await handle_plan_tool(
+                "plan_create", {"goal": "Break things"}, ctx
+            )
+
+        parsed = json.loads(result)
+        assert "error" in parsed
+        assert "failed" in parsed["error"].lower()
+
+    @pytest.mark.asyncio
+    async def test_plan_create_and_execute_success(self):
+        """Test plan_create_and_execute with mocked PlanAgent + PlanRunner."""
+        ctx = FakePlanningContext(["read_file"])
+
+        fake_plan = {
+            "title": "Read Plan",
+            "steps": [
+                {"title": "Read", "tool": "read_file", "args": {"path": "/tmp/test"}},
+            ],
+        }
+
+        @dataclass
+        class FakeStepResult:
+            step_index: int = 1
+            step_title: str = "Read"
+            tool_name: str = "read_file"
+            success: bool = True
+            error: str | None = None
+
+        @dataclass
+        class FakeExecResult:
+            success: bool = True
+            plan_id: str = "plan-1"
+            plan_title: str = "Read Plan"
+            total_duration: float = 0.5
+            steps: list = None
+            error: str | None = None
+            variables: dict = None
+
+            def __post_init__(self):
+                if self.steps is None:
+                    self.steps = [FakeStepResult()]
+                if self.variables is None:
+                    self.variables = {"step_1_result": "file contents"}
+
+        with (
+            patch("chuk_ai_planner.agents.plan_agent.PlanAgent") as MockAgent,
+            patch("mcp_cli.planning.executor.PlanRunner") as MockRunner,
+        ):
+            mock_agent = MockAgent.return_value
+            mock_agent.plan = AsyncMock(return_value=fake_plan)
+
+            mock_runner = MockRunner.return_value
+            mock_runner.execute_plan = AsyncMock(return_value=FakeExecResult())
+
+            result = await handle_plan_tool(
+                "plan_create_and_execute", {"goal": "Read a file"}, ctx
+            )
+
+        parsed = json.loads(result)
+        assert parsed["success"] is True
+        assert parsed["steps_completed"] == 1
+        assert "results" in parsed
+
+    @pytest.mark.asyncio
+    async def test_plan_execute_success(self):
+        """Test plan_execute with a saved plan and mocked PlanRunner."""
+        ctx = FakePlanningContext(["read_file"])
+
+        # Pre-save a plan
+        plan_data = {
+            "title": "Saved Plan",
+            "steps": [{"title": "Read", "tool": "read_file", "args": {"path": "/tmp"}}],
+        }
+        plan_id = await ctx.save_plan_from_dict(plan_data)
+
+        @dataclass
+        class FakeStepResult:
+            step_index: int = 1
+            step_title: str = "Read"
+            tool_name: str = "read_file"
+            success: bool = True
+            error: str | None = None
+
+        @dataclass
+        class FakeExecResult:
+            success: bool = True
+            plan_id: str = "plan-1"
+            plan_title: str = "Saved Plan"
+            total_duration: float = 0.3
+            steps: list = None
+            error: str | None = None
+            variables: dict = None
+
+            def __post_init__(self):
+                if self.steps is None:
+                    self.steps = [FakeStepResult()]
+                if self.variables is None:
+                    self.variables = {}
+
+        with patch("mcp_cli.planning.executor.PlanRunner") as MockRunner:
+            mock_runner = MockRunner.return_value
+            mock_runner.execute_plan = AsyncMock(return_value=FakeExecResult())
+
+            result = await handle_plan_tool("plan_execute", {"plan_id": plan_id}, ctx)
+
+        parsed = json.loads(result)
+        assert parsed["success"] is True
+        assert parsed["steps_total"] == 1
diff --git a/uv.lock b/uv.lock
index 15f539d0..22ba0935 100644
--- a/uv.lock
+++ b/uv.lock
@@ -10,6 +10,15 @@ resolution-markers = [
     "python_full_version < '3.12' and sys_platform != 'darwin' and sys_platform != 'win32'",
 ]
 
+[[package]]
+name = "aiofiles"
+version = "25.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/41/c3/534eac40372d8ee36ef40df62ec129bee4fdb5ad9706e58a29be53b2c970/aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2", size = 46354, upload-time = "2025-10-09T20:51:04.358Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668, upload-time = "2025-10-09T20:51:03.174Z" },
+]
+
 [[package]]
 name = "aiohappyeyeballs"
 version = "2.6.1"
@@ -211,6 +220,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b9/fa/123043af240e49752f1c4bd24da5053b6bd00cad78c2be53c0d1e8b975bc/backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34", size = 30181, upload-time = "2024-05-28T17:01:53.112Z" },
 ]
 
+[[package]]
+name = "beautifulsoup4"
+version = "4.14.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "soupsieve" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737, upload-time = "2025-11-30T15:08:26.084Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" },
+]
+
+[[package]]
+name = "bs4"
+version = "0.0.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "beautifulsoup4" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/aa/4acaf814ff901145da37332e05bb510452ebed97bc9602695059dd46ef39/bs4-0.0.2.tar.gz", hash = "sha256:a48685c58f50fe127722417bae83fe6badf500d54b55f7e39ffe43b798653925", size = 698, upload-time = "2024-01-17T18:15:47.371Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/bb/bf7aab772a159614954d84aa832c129624ba6c32faa559dfb200a534e50b/bs4-0.0.2-py2.py3-none-any.whl", hash = "sha256:abf8742c0805ef7f662dce4b51cca104cffe52b835238afc169142ab9b3fbccc", size = 1189, upload-time = "2024-01-17T18:15:48.613Z" },
+]
+
 [[package]]
 name = "cachetools"
 version = "7.0.1"
@@ -299,6 +333,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" },
 ]
 
+[[package]]
+name = "chardet"
+version = "6.0.0.post1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7f/42/fb9436c103a881a377e34b9f58d77b5f503461c702ff654ebe86151bcfe9/chardet-6.0.0.post1.tar.gz", hash = "sha256:6b78048c3c97c7b2ed1fbad7a18f76f5a6547f7d34dbab536cc13887c9a92fa4", size = 12521798, upload-time = "2026-02-22T15:09:17.925Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/66/42/5de54f632c2de53cd3415b3703383d5fff43a94cbc0567ef362515261a21/chardet-6.0.0.post1-py3-none-any.whl", hash = "sha256:c894a36800549adf7bb5f2af47033281b75fdfcd2aa0f0243be0ad22a52e2dcb", size = 627245, upload-time = "2026-02-22T15:09:15.876Z" },
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.4"
@@ -372,6 +415,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
 ]
 
+[[package]]
+name = "chuk-ai-planner"
+version = "0.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "asyncio" },
+    { name = "bs4" },
+    { name = "chuk-session-manager" },
+    { name = "chuk-tool-processor" },
+    { name = "geopy" },
+    { name = "lxml-html-clean" },
+    { name = "openai" },
+    { name = "pydantic" },
+    { name = "python-dotenv" },
+    { name = "readability-lxml" },
+    { name = "requests" },
+    { name = "uuid" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c6/b9/5cd6dd266eeddca056952682987b1317b1565c37c6abc286e656827e810f/chuk_ai_planner-0.5.1.tar.gz", hash = "sha256:c4c0ddf3b44275618b6ecebfd36d0bd98c510f1a734b1a302647672a3c754a52", size = 77819, upload-time = "2025-12-04T16:53:18.455Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/75/a8/bd068921d56a4c27014c444d0f766e9bcdfc6bee631a0f8aac05069b5573/chuk_ai_planner-0.5.1-py3-none-any.whl", hash = "sha256:cbc7f2c9380b2726824a047ac9a14901ae28dcde9a986f85acfcf40dc9748646", size = 87737, upload-time = "2025-12-04T16:53:17.117Z" },
+]
+
 [[package]]
 name = "chuk-ai-session-manager"
 version = "0.11"
@@ -443,6 +509,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/92/93/b316332cb2461fcb375394f0cf4ffd8ed489ddad5fe0bb4dc04a4b07414e/chuk_mcp_client_oauth-0.4-py3-none-any.whl", hash = "sha256:aa4fe95bdb232b268c063936edd5eae5d83531261b0947cc46db613dcc5c032d", size = 65743, upload-time = "2026-02-18T17:21:49.755Z" },
 ]
 
+[[package]]
+name = "chuk-session-manager"
+version = "0.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiofiles" },
+    { name = "chuk-tool-processor" },
+    { name = "pydantic" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ff/e9/8973b0a539044846b815e5c545f9cac00fda82d2d7fd59ed001d5d1a25da/chuk_session_manager-0.1.0.tar.gz", hash = "sha256:9cb896642b36e0e7c366b3e13ba99cbb9e5356c778a6cd63ba0c10f719674f98", size = 36503, upload-time = "2025-05-10T18:02:40.798Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dc/b2/50e9b3b0919df9c8db40edddf44e17362457aa4ca263e9d81efce573b469/chuk_session_manager-0.1.0-py3-none-any.whl", hash = "sha256:270044c38cf11c29a89e62462159b574d391a1f11f9c1423a9c7208fdb94e23f", size = 38492, upload-time = "2025-05-10T18:02:38.89Z" },
+]
+
 [[package]]
 name = "chuk-sessions"
 version = "0.6.1"
@@ -679,6 +759,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/00/58/73ed07ef8b09052f7b7955c0ad7e3bc9f624c9e3b86dcf261aa0d2ad99a0/csrf-0.1b1-py3-none-any.whl", hash = "sha256:87d34ec729dd321e80e6dcbad90537df112089204fcf9e488a8bbd1301831841", size = 3925, upload-time = "2018-06-17T14:55:09.51Z" },
 ]
 
+[[package]]
+name = "cssselect"
+version = "1.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ec/2e/cdfd8b01c37cbf4f9482eefd455853a3cf9c995029a46acd31dfaa9c1dd6/cssselect-1.4.0.tar.gz", hash = "sha256:fdaf0a1425e17dfe8c5cf66191d211b357cf7872ae8afc4c6762ddd8ac47fc92", size = 40589, upload-time = "2026-01-29T07:00:26.701Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/0c/7bb51e3acfafd16c48875bf3db03607674df16f5b6ef8d056586af7e2b8b/cssselect-1.4.0-py3-none-any.whl", hash = "sha256:c0ec5c0191c8ee39fcc8afc1540331d8b55b0183478c50e9c8a79d44dbceb1d8", size = 18540, upload-time = "2026-01-29T07:00:24.994Z" },
+]
+
 [[package]]
 name = "distro"
 version = "1.9.0"
@@ -849,6 +938,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e6/ab/fb21f4c939bb440104cc2b396d3be1d9b7a9fd3c6c2a53d98c45b3d7c954/fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437", size = 202505, upload-time = "2026-02-05T21:50:51.819Z" },
 ]
 
+[[package]]
+name = "geographiclib"
+version = "2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/df/78/4892343230a9d29faa1364564e525307a37e54ad776ea62c12129dbba704/geographiclib-2.1.tar.gz", hash = "sha256:6a6545e6262d0ed3522e13c515713718797e37ed8c672c31ad7b249f372ef108", size = 37004, upload-time = "2025-08-21T21:34:26Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/31/b3/802576f2ea5dcb48501bb162e4c7b7b3ca5654a42b2c968ef98a797a4c79/geographiclib-2.1-py3-none-any.whl", hash = "sha256:e2a873b9b9e7fc38721ad73d5f4e6c9ed140d428a339970f505c07056997d40b", size = 40740, upload-time = "2025-08-21T21:34:24.955Z" },
+]
+
+[[package]]
+name = "geopy"
+version = "2.4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "geographiclib" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0e/fd/ef6d53875ceab72c1fad22dbed5ec1ad04eb378c2251a6a8024bad890c3b/geopy-2.4.1.tar.gz", hash = "sha256:50283d8e7ad07d89be5cb027338c6365a32044df3ae2556ad3f52f4840b3d0d1", size = 117625, upload-time = "2023-11-23T21:49:32.734Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/15/cf2a69ade4b194aa524ac75112d5caac37414b20a3a03e6865dfe0bd1539/geopy-2.4.1-py3-none-any.whl", hash = "sha256:ae8b4bc5c1131820f4d75fce9d4aaaca0c85189b3aa5d64c3dcaf5e3b7b882a7", size = 125437, upload-time = "2023-11-23T21:49:30.421Z" },
+]
+
 [[package]]
 name = "google-auth"
 version = "2.48.0"
@@ -1318,6 +1428,125 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0f/b1/02eebed49c754b01b17de7705caa8c4ceecfb4f926cdafc220c863584360/lomond-0.3.3-py2.py3-none-any.whl", hash = "sha256:df1dd4dd7b802a12b71907ab1abb08b8ce9950195311207579379eb3b1553de7", size = 35512, upload-time = "2018-09-21T15:17:38.686Z" },
 ]
 
+[[package]]
+name = "lxml"
+version = "6.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/88/262177de60548e5a2bfc46ad28232c9e9cbde697bd94132aeb80364675cb/lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62", size = 4073426, upload-time = "2025-09-22T04:04:59.287Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/77/d5/becbe1e2569b474a23f0c672ead8a29ac50b2dc1d5b9de184831bda8d14c/lxml-6.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:13e35cbc684aadf05d8711a5d1b5857c92e5e580efa9a0d2be197199c8def607", size = 8634365, upload-time = "2025-09-22T04:00:45.672Z" },
+    { url = "https://files.pythonhosted.org/packages/28/66/1ced58f12e804644426b85d0bb8a4478ca77bc1761455da310505f1a3526/lxml-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b1675e096e17c6fe9c0e8c81434f5736c0739ff9ac6123c87c2d452f48fc938", size = 4650793, upload-time = "2025-09-22T04:00:47.783Z" },
+    { url = "https://files.pythonhosted.org/packages/11/84/549098ffea39dfd167e3f174b4ce983d0eed61f9d8d25b7bf2a57c3247fc/lxml-6.0.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac6e5811ae2870953390452e3476694196f98d447573234592d30488147404d", size = 4944362, upload-time = "2025-09-22T04:00:49.845Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/bd/f207f16abf9749d2037453d56b643a7471d8fde855a231a12d1e095c4f01/lxml-6.0.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5aa0fc67ae19d7a64c3fe725dc9a1bb11f80e01f78289d05c6f62545affec438", size = 5083152, upload-time = "2025-09-22T04:00:51.709Z" },
+    { url = "https://files.pythonhosted.org/packages/15/ae/bd813e87d8941d52ad5b65071b1affb48da01c4ed3c9c99e40abb266fbff/lxml-6.0.2-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de496365750cc472b4e7902a485d3f152ecf57bd3ba03ddd5578ed8ceb4c5964", size = 5023539, upload-time = "2025-09-22T04:00:53.593Z" },
+    { url = "https://files.pythonhosted.org/packages/02/cd/9bfef16bd1d874fbe0cb51afb00329540f30a3283beb9f0780adbb7eec03/lxml-6.0.2-cp311-cp311-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:200069a593c5e40b8f6fc0d84d86d970ba43138c3e68619ffa234bc9bb806a4d", size = 5344853, upload-time = "2025-09-22T04:00:55.524Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/89/ea8f91594bc5dbb879734d35a6f2b0ad50605d7fb419de2b63d4211765cc/lxml-6.0.2-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d2de809c2ee3b888b59f995625385f74629707c9355e0ff856445cdcae682b7", size = 5225133, upload-time = "2025-09-22T04:00:57.269Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/37/9c735274f5dbec726b2db99b98a43950395ba3d4a1043083dba2ad814170/lxml-6.0.2-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:b2c3da8d93cf5db60e8858c17684c47d01fee6405e554fb55018dd85fc23b178", size = 4677944, upload-time = "2025-09-22T04:00:59.052Z" },
+    { url = "https://files.pythonhosted.org/packages/20/28/7dfe1ba3475d8bfca3878365075abe002e05d40dfaaeb7ec01b4c587d533/lxml-6.0.2-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:442de7530296ef5e188373a1ea5789a46ce90c4847e597856570439621d9c553", size = 5284535, upload-time = "2025-09-22T04:01:01.335Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/cf/5f14bc0de763498fc29510e3532bf2b4b3a1c1d5d0dff2e900c16ba021ef/lxml-6.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2593c77efde7bfea7f6389f1ab249b15ed4aa5bc5cb5131faa3b843c429fbedb", size = 5067343, upload-time = "2025-09-22T04:01:03.13Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/b0/bb8275ab5472f32b28cfbbcc6db7c9d092482d3439ca279d8d6fa02f7025/lxml-6.0.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:3e3cb08855967a20f553ff32d147e14329b3ae70ced6edc2f282b94afbc74b2a", size = 4725419, upload-time = "2025-09-22T04:01:05.013Z" },
+    { url = "https://files.pythonhosted.org/packages/25/4c/7c222753bc72edca3b99dbadba1b064209bc8ed4ad448af990e60dcce462/lxml-6.0.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ed6c667fcbb8c19c6791bbf40b7268ef8ddf5a96940ba9404b9f9a304832f6c", size = 5275008, upload-time = "2025-09-22T04:01:07.327Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/8c/478a0dc6b6ed661451379447cdbec77c05741a75736d97e5b2b729687828/lxml-6.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b8f18914faec94132e5b91e69d76a5c1d7b0c73e2489ea8929c4aaa10b76bbf7", size = 5248906, upload-time = "2025-09-22T04:01:09.452Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/d9/5be3a6ab2784cdf9accb0703b65e1b64fcdd9311c9f007630c7db0cfcce1/lxml-6.0.2-cp311-cp311-win32.whl", hash = "sha256:6605c604e6daa9e0d7f0a2137bdc47a2e93b59c60a65466353e37f8272f47c46", size = 3610357, upload-time = "2025-09-22T04:01:11.102Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/7d/ca6fb13349b473d5732fb0ee3eec8f6c80fc0688e76b7d79c1008481bf1f/lxml-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e5867f2651016a3afd8dd2c8238baa66f1e2802f44bc17e236f547ace6647078", size = 4036583, upload-time = "2025-09-22T04:01:12.766Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/a2/51363b5ecd3eab46563645f3a2c3836a2fc67d01a1b87c5017040f39f567/lxml-6.0.2-cp311-cp311-win_arm64.whl", hash = "sha256:4197fb2534ee05fd3e7afaab5d8bfd6c2e186f65ea7f9cd6a82809c887bd1285", size = 3680591, upload-time = "2025-09-22T04:01:14.874Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/c8/8ff2bc6b920c84355146cd1ab7d181bc543b89241cfb1ebee824a7c81457/lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456", size = 8661887, upload-time = "2025-09-22T04:01:17.265Z" },
+    { url = "https://files.pythonhosted.org/packages/37/6f/9aae1008083bb501ef63284220ce81638332f9ccbfa53765b2b7502203cf/lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924", size = 4667818, upload-time = "2025-09-22T04:01:19.688Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/ca/31fb37f99f37f1536c133476674c10b577e409c0a624384147653e38baf2/lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f", size = 4950807, upload-time = "2025-09-22T04:01:21.487Z" },
+    { url = "https://files.pythonhosted.org/packages/da/87/f6cb9442e4bada8aab5ae7e1046264f62fdbeaa6e3f6211b93f4c0dd97f1/lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534", size = 5109179, upload-time = "2025-09-22T04:01:23.32Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/20/a7760713e65888db79bbae4f6146a6ae5c04e4a204a3c48896c408cd6ed2/lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564", size = 5023044, upload-time = "2025-09-22T04:01:25.118Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/b0/7e64e0460fcb36471899f75831509098f3fd7cd02a3833ac517433cb4f8f/lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f", size = 5359685, upload-time = "2025-09-22T04:01:27.398Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/e1/e5df362e9ca4e2f48ed6411bd4b3a0ae737cc842e96877f5bf9428055ab4/lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0", size = 5654127, upload-time = "2025-09-22T04:01:29.629Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/d1/232b3309a02d60f11e71857778bfcd4acbdb86c07db8260caf7d008b08f8/lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192", size = 5253958, upload-time = "2025-09-22T04:01:31.535Z" },
+    { url = "https://files.pythonhosted.org/packages/35/35/d955a070994725c4f7d80583a96cab9c107c57a125b20bb5f708fe941011/lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0", size = 4711541, upload-time = "2025-09-22T04:01:33.801Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/be/667d17363b38a78c4bd63cfd4b4632029fd68d2c2dc81f25ce9eb5224dd5/lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092", size = 5267426, upload-time = "2025-09-22T04:01:35.639Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/47/62c70aa4a1c26569bc958c9ca86af2bb4e1f614e8c04fb2989833874f7ae/lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f", size = 5064917, upload-time = "2025-09-22T04:01:37.448Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/55/6ceddaca353ebd0f1908ef712c597f8570cc9c58130dbb89903198e441fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8", size = 4788795, upload-time = "2025-09-22T04:01:39.165Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/e8/fd63e15da5e3fd4c2146f8bbb3c14e94ab850589beab88e547b2dbce22e1/lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f", size = 5676759, upload-time = "2025-09-22T04:01:41.506Z" },
+    { url = "https://files.pythonhosted.org/packages/76/47/b3ec58dc5c374697f5ba37412cd2728f427d056315d124dd4b61da381877/lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6", size = 5255666, upload-time = "2025-09-22T04:01:43.363Z" },
+    { url = "https://files.pythonhosted.org/packages/19/93/03ba725df4c3d72afd9596eef4a37a837ce8e4806010569bedfcd2cb68fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322", size = 5277989, upload-time = "2025-09-22T04:01:45.215Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/80/c06de80bfce881d0ad738576f243911fccf992687ae09fd80b734712b39c/lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849", size = 3611456, upload-time = "2025-09-22T04:01:48.243Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/d7/0cdfb6c3e30893463fb3d1e52bc5f5f99684a03c29a0b6b605cfae879cd5/lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f", size = 4011793, upload-time = "2025-09-22T04:01:50.042Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/7b/93c73c67db235931527301ed3785f849c78991e2e34f3fd9a6663ffda4c5/lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6", size = 3672836, upload-time = "2025-09-22T04:01:52.145Z" },
+    { url = "https://files.pythonhosted.org/packages/53/fd/4e8f0540608977aea078bf6d79f128e0e2c2bba8af1acf775c30baa70460/lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77", size = 8648494, upload-time = "2025-09-22T04:01:54.242Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/f4/2a94a3d3dfd6c6b433501b8d470a1960a20ecce93245cf2db1706adf6c19/lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f", size = 4661146, upload-time = "2025-09-22T04:01:56.282Z" },
+    { url = "https://files.pythonhosted.org/packages/25/2e/4efa677fa6b322013035d38016f6ae859d06cac67437ca7dc708a6af7028/lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452", size = 4946932, upload-time = "2025-09-22T04:01:58.989Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/0f/526e78a6d38d109fdbaa5049c62e1d32fdd70c75fb61c4eadf3045d3d124/lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048", size = 5100060, upload-time = "2025-09-22T04:02:00.812Z" },
+    { url = "https://files.pythonhosted.org/packages/81/76/99de58d81fa702cc0ea7edae4f4640416c2062813a00ff24bd70ac1d9c9b/lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df", size = 5019000, upload-time = "2025-09-22T04:02:02.671Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/35/9e57d25482bc9a9882cb0037fdb9cc18f4b79d85df94fa9d2a89562f1d25/lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1", size = 5348496, upload-time = "2025-09-22T04:02:04.904Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/8e/cb99bd0b83ccc3e8f0f528e9aa1f7a9965dfec08c617070c5db8d63a87ce/lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916", size = 5643779, upload-time = "2025-09-22T04:02:06.689Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/34/9e591954939276bb679b73773836c6684c22e56d05980e31d52a9a8deb18/lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd", size = 5244072, upload-time = "2025-09-22T04:02:08.587Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/27/b29ff065f9aaca443ee377aff699714fcbffb371b4fce5ac4ca759e436d5/lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6", size = 4718675, upload-time = "2025-09-22T04:02:10.783Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/9f/f756f9c2cd27caa1a6ef8c32ae47aadea697f5c2c6d07b0dae133c244fbe/lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a", size = 5255171, upload-time = "2025-09-22T04:02:12.631Z" },
+    { url = "https://files.pythonhosted.org/packages/61/46/bb85ea42d2cb1bd8395484fd72f38e3389611aa496ac7772da9205bbda0e/lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679", size = 5057175, upload-time = "2025-09-22T04:02:14.718Z" },
+    { url = "https://files.pythonhosted.org/packages/95/0c/443fc476dcc8e41577f0af70458c50fe299a97bb6b7505bb1ae09aa7f9ac/lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659", size = 4785688, upload-time = "2025-09-22T04:02:16.957Z" },
+    { url = "https://files.pythonhosted.org/packages/48/78/6ef0b359d45bb9697bc5a626e1992fa5d27aa3f8004b137b2314793b50a0/lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484", size = 5660655, upload-time = "2025-09-22T04:02:18.815Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/ea/e1d33808f386bc1339d08c0dcada6e4712d4ed8e93fcad5f057070b7988a/lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2", size = 5247695, upload-time = "2025-09-22T04:02:20.593Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/47/eba75dfd8183673725255247a603b4ad606f4ae657b60c6c145b381697da/lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314", size = 5269841, upload-time = "2025-09-22T04:02:22.489Z" },
+    { url = "https://files.pythonhosted.org/packages/76/04/5c5e2b8577bc936e219becb2e98cdb1aca14a4921a12995b9d0c523502ae/lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2", size = 3610700, upload-time = "2025-09-22T04:02:24.465Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/0a/4643ccc6bb8b143e9f9640aa54e38255f9d3b45feb2cbe7ae2ca47e8782e/lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7", size = 4010347, upload-time = "2025-09-22T04:02:26.286Z" },
+    { url = "https://files.pythonhosted.org/packages/31/ef/dcf1d29c3f530577f61e5fe2f1bd72929acf779953668a8a47a479ae6f26/lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf", size = 3671248, upload-time = "2025-09-22T04:02:27.918Z" },
+    { url = "https://files.pythonhosted.org/packages/03/15/d4a377b385ab693ce97b472fe0c77c2b16ec79590e688b3ccc71fba19884/lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe", size = 8659801, upload-time = "2025-09-22T04:02:30.113Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/e8/c128e37589463668794d503afaeb003987373c5f94d667124ffd8078bbd9/lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d", size = 4659403, upload-time = "2025-09-22T04:02:32.119Z" },
+    { url = "https://files.pythonhosted.org/packages/00/ce/74903904339decdf7da7847bb5741fc98a5451b42fc419a86c0c13d26fe2/lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d", size = 4966974, upload-time = "2025-09-22T04:02:34.155Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/d3/131dec79ce61c5567fecf82515bd9bc36395df42501b50f7f7f3bd065df0/lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5", size = 5102953, upload-time = "2025-09-22T04:02:36.054Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/ea/a43ba9bb750d4ffdd885f2cd333572f5bb900cd2408b67fdda07e85978a0/lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0", size = 5055054, upload-time = "2025-09-22T04:02:38.154Z" },
+    { url = "https://files.pythonhosted.org/packages/60/23/6885b451636ae286c34628f70a7ed1fcc759f8d9ad382d132e1c8d3d9bfd/lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba", size = 5352421, upload-time = "2025-09-22T04:02:40.413Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5b/fc2ddfc94ddbe3eebb8e9af6e3fd65e2feba4967f6a4e9683875c394c2d8/lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0", size = 5673684, upload-time = "2025-09-22T04:02:42.288Z" },
+    { url = "https://files.pythonhosted.org/packages/29/9c/47293c58cc91769130fbf85531280e8cc7868f7fbb6d92f4670071b9cb3e/lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d", size = 5252463, upload-time = "2025-09-22T04:02:44.165Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/da/ba6eceb830c762b48e711ded880d7e3e89fc6c7323e587c36540b6b23c6b/lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37", size = 4698437, upload-time = "2025-09-22T04:02:46.524Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/24/7be3f82cb7990b89118d944b619e53c656c97dc89c28cfb143fdb7cd6f4d/lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9", size = 5269890, upload-time = "2025-09-22T04:02:48.812Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/bd/dcfb9ea1e16c665efd7538fc5d5c34071276ce9220e234217682e7d2c4a5/lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917", size = 5097185, upload-time = "2025-09-22T04:02:50.746Z" },
+    { url = "https://files.pythonhosted.org/packages/21/04/a60b0ff9314736316f28316b694bccbbabe100f8483ad83852d77fc7468e/lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f", size = 4745895, upload-time = "2025-09-22T04:02:52.968Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/bd/7d54bd1846e5a310d9c715921c5faa71cf5c0853372adf78aee70c8d7aa2/lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8", size = 5695246, upload-time = "2025-09-22T04:02:54.798Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/32/5643d6ab947bc371da21323acb2a6e603cedbe71cb4c99c8254289ab6f4e/lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a", size = 5260797, upload-time = "2025-09-22T04:02:57.058Z" },
+    { url = "https://files.pythonhosted.org/packages/33/da/34c1ec4cff1eea7d0b4cd44af8411806ed943141804ac9c5d565302afb78/lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c", size = 5277404, upload-time = "2025-09-22T04:02:58.966Z" },
+    { url = "https://files.pythonhosted.org/packages/82/57/4eca3e31e54dc89e2c3507e1cd411074a17565fa5ffc437c4ae0a00d439e/lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b", size = 3670072, upload-time = "2025-09-22T04:03:38.05Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/e0/c96cf13eccd20c9421ba910304dae0f619724dcf1702864fd59dd386404d/lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed", size = 4080617, upload-time = "2025-09-22T04:03:39.835Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/5d/b3f03e22b3d38d6f188ef044900a9b29b2fe0aebb94625ce9fe244011d34/lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8", size = 3754930, upload-time = "2025-09-22T04:03:41.565Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/5c/42c2c4c03554580708fc738d13414801f340c04c3eff90d8d2d227145275/lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d", size = 8910380, upload-time = "2025-09-22T04:03:01.645Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/4f/12df843e3e10d18d468a7557058f8d3733e8b6e12401f30b1ef29360740f/lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba", size = 4775632, upload-time = "2025-09-22T04:03:03.814Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/0c/9dc31e6c2d0d418483cbcb469d1f5a582a1cd00a1f4081953d44051f3c50/lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601", size = 4975171, upload-time = "2025-09-22T04:03:05.651Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/2b/9b870c6ca24c841bdd887504808f0417aa9d8d564114689266f19ddf29c8/lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed", size = 5110109, upload-time = "2025-09-22T04:03:07.452Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/0c/4f5f2a4dd319a178912751564471355d9019e220c20d7db3fb8307ed8582/lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37", size = 5041061, upload-time = "2025-09-22T04:03:09.297Z" },
+    { url = "https://files.pythonhosted.org/packages/12/64/554eed290365267671fe001a20d72d14f468ae4e6acef1e179b039436967/lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338", size = 5306233, upload-time = "2025-09-22T04:03:11.651Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/31/1d748aa275e71802ad9722df32a7a35034246b42c0ecdd8235412c3396ef/lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9", size = 5604739, upload-time = "2025-09-22T04:03:13.592Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/41/2c11916bcac09ed561adccacceaedd2bf0e0b25b297ea92aab99fd03d0fa/lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd", size = 5225119, upload-time = "2025-09-22T04:03:15.408Z" },
+    { url = "https://files.pythonhosted.org/packages/99/05/4e5c2873d8f17aa018e6afde417c80cc5d0c33be4854cce3ef5670c49367/lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d", size = 4633665, upload-time = "2025-09-22T04:03:17.262Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/c9/dcc2da1bebd6275cdc723b515f93edf548b82f36a5458cca3578bc899332/lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9", size = 5234997, upload-time = "2025-09-22T04:03:19.14Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/e2/5172e4e7468afca64a37b81dba152fc5d90e30f9c83c7c3213d6a02a5ce4/lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e", size = 5090957, upload-time = "2025-09-22T04:03:21.436Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/b3/15461fd3e5cd4ddcb7938b87fc20b14ab113b92312fc97afe65cd7c85de1/lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d", size = 4764372, upload-time = "2025-09-22T04:03:23.27Z" },
+    { url = "https://files.pythonhosted.org/packages/05/33/f310b987c8bf9e61c4dd8e8035c416bd3230098f5e3cfa69fc4232de7059/lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec", size = 5634653, upload-time = "2025-09-22T04:03:25.767Z" },
+    { url = "https://files.pythonhosted.org/packages/70/ff/51c80e75e0bc9382158133bdcf4e339b5886c6ee2418b5199b3f1a61ed6d/lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272", size = 5233795, upload-time = "2025-09-22T04:03:27.62Z" },
+    { url = "https://files.pythonhosted.org/packages/56/4d/4856e897df0d588789dd844dbed9d91782c4ef0b327f96ce53c807e13128/lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f", size = 5257023, upload-time = "2025-09-22T04:03:30.056Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/85/86766dfebfa87bea0ab78e9ff7a4b4b45225df4b4d3b8cc3c03c5cd68464/lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312", size = 3911420, upload-time = "2025-09-22T04:03:32.198Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/1a/b248b355834c8e32614650b8008c69ffeb0ceb149c793961dd8c0b991bb3/lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca", size = 4406837, upload-time = "2025-09-22T04:03:34.027Z" },
+    { url = "https://files.pythonhosted.org/packages/92/aa/df863bcc39c5e0946263454aba394de8a9084dbaff8ad143846b0d844739/lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c", size = 3822205, upload-time = "2025-09-22T04:03:36.249Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/11/29d08bc103a62c0eba8016e7ed5aeebbf1e4312e83b0b1648dd203b0e87d/lxml-6.0.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1c06035eafa8404b5cf475bb37a9f6088b0aca288d4ccc9d69389750d5543700", size = 3949829, upload-time = "2025-09-22T04:04:45.608Z" },
+    { url = "https://files.pythonhosted.org/packages/12/b3/52ab9a3b31e5ab8238da241baa19eec44d2ab426532441ee607165aebb52/lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c7d13103045de1bdd6fe5d61802565f1a3537d70cd3abf596aa0af62761921ee", size = 4226277, upload-time = "2025-09-22T04:04:47.754Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/33/1eaf780c1baad88224611df13b1c2a9dfa460b526cacfe769103ff50d845/lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a3c150a95fbe5ac91de323aa756219ef9cf7fde5a3f00e2281e30f33fa5fa4f", size = 4330433, upload-time = "2025-09-22T04:04:49.907Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/c1/27428a2ff348e994ab4f8777d3a0ad510b6b92d37718e5887d2da99952a2/lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60fa43be34f78bebb27812ed90f1925ec99560b0fa1decdb7d12b84d857d31e9", size = 4272119, upload-time = "2025-09-22T04:04:51.801Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/d0/3020fa12bcec4ab62f97aab026d57c2f0cfd480a558758d9ca233bb6a79d/lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21c73b476d3cfe836be731225ec3421fa2f048d84f6df6a8e70433dff1376d5a", size = 4417314, upload-time = "2025-09-22T04:04:55.024Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/77/d7f491cbc05303ac6801651aabeb262d43f319288c1ea96c66b1d2692ff3/lxml-6.0.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:27220da5be049e936c3aca06f174e8827ca6445a4353a1995584311487fc4e3e", size = 3518768, upload-time = "2025-09-22T04:04:57.097Z" },
+]
+
+[package.optional-dependencies]
+html-clean = [
+    { name = "lxml-html-clean" },
+]
+
+[[package]]
+name = "lxml-html-clean"
+version = "0.4.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "lxml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d9/cb/c9c5bb2a9c47292e236a808dd233a03531f53b626f36259dcd32b49c76da/lxml_html_clean-0.4.3.tar.gz", hash = "sha256:c9df91925b00f836c807beab127aac82575110eacff54d0a75187914f1bd9d8c", size = 21498, upload-time = "2025-10-02T20:49:24.895Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/10/4a/63a9540e3ca73709f4200564a737d63a4c8c9c4dd032bab8535f507c190a/lxml_html_clean-0.4.3-py3-none-any.whl", hash = "sha256:63fd7b0b9c3a2e4176611c2ca5d61c4c07ffca2de76c14059a81a2825833731e", size = 14177, upload-time = "2025-10-02T20:49:23.749Z" },
+]
+
 [[package]]
 name = "markdown-it-py"
 version = "4.0.0"
@@ -1410,6 +1639,7 @@ version = "0.16"
 source = { editable = "." }
 dependencies = [
     { name = "asyncio" },
+    { name = "chuk-ai-planner" },
     { name = "chuk-ai-session-manager" },
     { name = "chuk-llm" },
     { name = "chuk-mcp-client-oauth" },
@@ -1453,6 +1683,7 @@ dev = [
 requires-dist = [
     { name = "asyncio", specifier = ">=3.4.3" },
     { name = "asyncio", marker = "extra == 'dev'", specifier = ">=3.4.3" },
+    { name = "chuk-ai-planner", specifier = ">=0.2" },
     { name = "chuk-ai-session-manager", specifier = ">=0.11" },
     { name = "chuk-llm", specifier = ">=0.17.1" },
     { name = "chuk-mcp-client-oauth", specifier = ">=0.3.5" },
@@ -2415,6 +2646,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
 ]
 
+[[package]]
+name = "readability-lxml"
+version = "0.8.4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "chardet" },
+    { name = "cssselect" },
+    { name = "lxml", extra = ["html-clean"] },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/3e/dc87d97532ddad58af786ec89c7036182e352574c1cba37bf2bf783d2b15/readability_lxml-0.8.4.1.tar.gz", hash = "sha256:9d2924f5942dd7f37fb4da353263b22a3e877ccf922d0e45e348e4177b035a53", size = 22874, upload-time = "2025-05-03T21:11:45.493Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/75/2cc58965097e351415af420be81c4665cf80da52a17ef43c01ffbe2caf91/readability_lxml-0.8.4.1-py3-none-any.whl", hash = "sha256:874c0cea22c3bf2b78c7f8df831bfaad3c0a89b7301d45a188db581652b4b465", size = 19912, upload-time = "2025-05-03T21:11:43.993Z" },
+]
+
 [[package]]
 name = "referencing"
 version = "0.37.0"
@@ -2768,6 +3013,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
 ]
 
+[[package]]
+name = "soupsieve"
+version = "2.8.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7b/ae/2d9c981590ed9999a0d91755b47fc74f74de286b0f5cee14c9269041e6c4/soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349", size = 118627, upload-time = "2026-01-20T04:27:02.457Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" },
+]
+
 [[package]]
 name = "tabulate"
 version = "0.9.0"