diff --git a/action.yml b/action.yml
index 6d59925..aa7b0b5 100644
--- a/action.yml
+++ b/action.yml
@@ -37,9 +37,13 @@ inputs:
     required: false
     default: 'false'
   fail-on:
-    description: 'Comma-separated statuses that fail CI (REGRESSION, TOOLS_CHANGED, OUTPUT_CHANGED)'
+    description: 'Comma-separated statuses that fail CI (REGRESSION, TOOLS_CHANGED, OUTPUT_CHANGED, CONTRACT_DRIFT)'
     required: false
     default: 'REGRESSION'
+  contracts:
+    description: 'Check MCP contracts for interface drift before running tests'
+    required: false
+    default: 'false'
   generate-report:
     description: 'Generate HTML report'
     required: false
@@ -102,7 +106,17 @@ runs:
 
         # Regression detection mode
         if [ "${{ inputs.diff }}" = "true" ]; then
-          CMD="$CMD --diff --fail-on ${{ inputs.fail-on }}"
+          CMD="$CMD --diff"
+        fi
+
+        # MCP contract drift detection
+        if [ "${{ inputs.contracts }}" = "true" ]; then
+          CMD="$CMD --contracts"
+        fi
+
+        # Set fail-on (applies to both diff and contract statuses)
+        if [ "${{ inputs.diff }}" = "true" ] || [ "${{ inputs.contracts }}" = "true" ]; then
+          CMD="$CMD --fail-on ${{ inputs.fail-on }}"
         fi
 
         if [ "${{ inputs.generate-report }}" = "true" ]; then
diff --git a/docs/MCP_CONTRACTS.md b/docs/MCP_CONTRACTS.md
new file mode 100644
index 0000000..dc58f1a
--- /dev/null
+++ b/docs/MCP_CONTRACTS.md
@@ -0,0 +1,228 @@
+# MCP Contract Testing
+
+Detect when external MCP servers change their interface before your agent breaks.
+
+## The Problem
+
+When you use MCP servers you don't own (Scenario 2), the server can change its
+tool definitions at any time: rename parameters, remove tools, add required fields.
+Your agent tests pass today and fail tomorrow — not because your code changed,
+but because the server did.
+
+## The Solution
+
+MCP contract testing captures a snapshot of a server's tool definitions and diffs
+against it on every CI run. If the interface changed, you know immediately — before
+running your full test suite.
+
+This mirrors EvalView's golden baseline system:
+- **Golden traces** detect when your agent's *behavior* drifts
+- **MCP contracts** detect when an external server's *interface* drifts
+
+## Quick Start
+
+### 1. Snapshot a server
+
+```bash
+evalview mcp snapshot "npx:@modelcontextprotocol/server-github" --name server-github
+```
+
+Output:
+```
+Snapshot saved: .evalview/contracts/server-github.contract.json
+Tools discovered: 8
+  - create_issue
+  - list_issues
+  - create_pull_request
+  - ...
+```
+
+### 2. Check for drift
+
+```bash
+evalview mcp check server-github
+```
+
+If the server changed:
+```
+CONTRACT_DRIFT - 2 breaking change(s)
+  REMOVED: create_pull_request - tool 'create_pull_request' no longer available
+  CHANGED: list_issues - new required parameter 'owner'
+```
+
+### 3. Use in CI
+
+```bash
+evalview run tests/ --contracts --fail-on "REGRESSION,CONTRACT_DRIFT"
+```
+
+The `--contracts` flag checks all saved contracts *before* running tests.
+If any contract drifted, the run aborts immediately — no point testing against
+a broken interface.
+
+## CLI Reference
+
+### `evalview mcp snapshot`
+
+Capture tool definitions from an MCP server.
+
+```bash
+evalview mcp snapshot <endpoint> --name <server-name> [--notes "..."] [--timeout 30]
+```
+
+| Argument | Description |
+|----------|-------------|
+| `endpoint` | MCP server endpoint (e.g., `npx:@modelcontextprotocol/server-github`) |
+| `--name` | Human-readable identifier for this contract (required) |
+| `--notes` | Optional notes about this snapshot |
+| `--timeout` | Connection timeout in seconds (default: 30) |
+
+Supports all MCP transport types:
+- **stdio**: `"npx:@modelcontextprotocol/server-filesystem /tmp"`
+- **HTTP**: `"http://localhost:8080"`
+- **Command**: `"stdio:python my_server.py"`
+
+### `evalview mcp check`
+
+Compare current server interface against a saved contract.
+
+```bash
+evalview mcp check <name> [--endpoint <override>] [--timeout 30]
+```
+
+| Argument | Description |
+|----------|-------------|
+| `name` | Contract name (from `--name` in snapshot) |
+| `--endpoint` | Override endpoint (default: use endpoint from snapshot) |
+
+Exit codes:
+- `0` — No breaking changes
+- `1` — Breaking changes detected (CONTRACT_DRIFT)
+- `2` — Could not connect to server
+
+### `evalview mcp list`
+
+List all saved contracts.
+
+```bash
+evalview mcp list
+```
+
+### `evalview mcp show`
+
+Show full details of a contract including all tool schemas.
+
+```bash
+evalview mcp show <name>
+```
+
+### `evalview mcp delete`
+
+Remove a contract.
+
+```bash
+evalview mcp delete <name> [--force]
+```
+
+## Integration with `evalview run`
+
+The `--contracts` flag adds a pre-flight check to any test run:
+
+```bash
+evalview run tests/ --contracts
+```
+
+This checks all contracts in `.evalview/contracts/` before running tests.
+Combine with `--fail-on CONTRACT_DRIFT` to fail CI on drift:
+
+```bash
+evalview run tests/ --contracts --fail-on "REGRESSION,CONTRACT_DRIFT"
+```
+
+Or use `--strict` (now includes CONTRACT_DRIFT):
+
+```bash
+evalview run tests/ --contracts --strict
+```
+
+## GitHub Actions
+
+```yaml
+- name: Run EvalView
+  uses: hidai25/eval-view@v0.2.1
+  with:
+    diff: true
+    contracts: true
+    fail-on: 'REGRESSION,CONTRACT_DRIFT'
+```
+
+## What Gets Detected
+
+### Breaking changes (trigger CONTRACT_DRIFT)
+
+| Change | Example |
+|--------|---------|
+| Tool removed | `create_pull_request` no longer exists |
+| Required parameter added | New required param `owner` on `list_issues` |
+| Parameter removed | `repo` param no longer accepted |
+| Parameter type changed | `limit` changed from `string` to `integer` |
+| Parameter became required | `owner` was optional, now required |
+
+### Informational changes (logged, don't fail)
+
+| Change | Example |
+|--------|---------|
+| New tool added | `merge_pull_request` now available |
+| Optional parameter added | New optional param `labels` on `create_issue` |
+| Description changed | Tool description updated |
+
+## Contract File Format
+
+Contracts are stored as JSON in `.evalview/contracts/`:
+
+```json
+{
+  "metadata": {
+    "server_name": "server-github",
+    "endpoint": "npx:@modelcontextprotocol/server-github",
+    "snapshot_at": "2026-02-07T10:30:00",
+    "protocol_version": "2024-11-05",
+    "tool_count": 8,
+    "schema_hash": "a1b2c3d4e5f67890"
+  },
+  "tools": [
+    {
+      "name": "create_issue",
+      "description": "Create a new issue in a GitHub repository",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "repo": { "type": "string" },
+          "title": { "type": "string" },
+          "body": { "type": "string" }
+        },
+        "required": ["repo", "title"]
+      }
+    }
+  ]
+}
+```
+
+Commit these files to your repo so CI can use them.
+
+## Best Practices
+
+1. **Snapshot after verifying** — Run your tests first, confirm everything works,
+   then snapshot. The contract represents a known-good interface.
+
+2. **Refresh periodically** — If a contract is >30 days old, `evalview mcp check`
+   will warn you. Re-snapshot to accept intentional changes.
+
+3. **One contract per server** — Name contracts after the server, not the tools.
+   `server-github` not `create-issue-tool`.
+
+4. **Commit contracts** — Store `.evalview/contracts/` in git. They're small JSON
+   files and CI needs them.
+
+5. **Check before testing** — Use `--contracts` on `evalview run` so drift is
+   caught before wasting time on tests that will fail anyway.
diff --git a/evalview/adapters/mcp_adapter.py b/evalview/adapters/mcp_adapter.py
index 6f49865..6a68f4b 100644
--- a/evalview/adapters/mcp_adapter.py
+++ b/evalview/adapters/mcp_adapter.py
@@ -449,6 +449,107 @@ def _create_error_trace(
             trace_context=tracer.build_trace_context(),
         )
 
+    async def discover_tools(self) -> List[Dict[str, Any]]:
+        """Discover available tools from an MCP server.
+
+        Connects to the server, initializes the session, and calls tools/list
+        to retrieve all available tool definitions (name, description, inputSchema).
+
+        Returns:
+            List of tool definition dicts from the MCP server.
+
+        Raises:
+            Exception: If connection or discovery fails.
+        """
+        transport, target = self._parse_endpoint()
+
+        if transport == "stdio":
+            return await self._discover_tools_stdio(target)
+        else:
+            return await self._discover_tools_http(target)
+
+    async def _discover_tools_stdio(self, command: str) -> List[Dict[str, Any]]:
+        """Discover tools via stdio transport."""
+        import shlex
+
+        cmd_parts = shlex.split(command)
+        process = await asyncio.create_subprocess_exec(
+            *cmd_parts,
+            stdin=asyncio.subprocess.PIPE,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+
+        try:
+            await self._send_request(
+                process,
+                "initialize",
+                {
+                    "protocolVersion": "2024-11-05",
+                    "capabilities": {},
+                    "clientInfo": {"name": "evalview", "version": "0.1.7"},
+                },
+            )
+            await self._send_notification(process, "notifications/initialized", {})
+
+            result = await self._send_request(process, "tools/list", {})
+            return result.get("tools", [])
+        finally:
+            process.terminate()
+            await process.wait()
+
+    async def _discover_tools_http(self, url: str) -> List[Dict[str, Any]]:
+        """Discover tools via HTTP transport."""
+        import httpx
+
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            # Initialize session
+            self._request_id += 1
+            init_response = await client.post(
+                url,
+                json={
+                    "jsonrpc": "2.0",
+                    "id": self._request_id,
+                    "method": "initialize",
+                    "params": {
+                        "protocolVersion": "2024-11-05",
+                        "capabilities": {},
+                        "clientInfo": {"name": "evalview", "version": "0.1.7"},
+                    },
+                },
+            )
+            init_result = init_response.json()
+            if "error" in init_result:
+                raise Exception(f"MCP initialize error: {init_result['error']}")
+
+            # Send initialized notification (required by MCP protocol)
+            await client.post(
+                url,
+                json={
+                    "jsonrpc": "2.0",
+                    "method": "notifications/initialized",
+                    "params": {},
+                },
+            )
+
+            # Request tool list
+            self._request_id += 1
+            response = await client.post(
+                url,
+                json={
+                    "jsonrpc": "2.0",
+                    "id": self._request_id,
+                    "method": "tools/list",
+                    "params": {},
+                },
+            )
+            result = response.json()
+
+            if "error" in result:
+                raise Exception(f"MCP tools/list error: {result['error']}")
+
+            return result.get("result", {}).get("tools", [])
+
     async def health_check(self) -> bool:
         """Check if MCP server is reachable."""
         transport, target = self._parse_endpoint()
diff --git a/evalview/cli.py b/evalview/cli.py
index c74e777..6797921 100644
--- a/evalview/cli.py
+++ b/evalview/cli.py
@@ -1360,7 +1360,7 @@ async def _init_wizard_async(dir: str):
     "--fail-on",
     type=str,
     default=None,
-    help="Comma-separated diff statuses that cause exit code 1 (default: REGRESSION, or from ci.fail_on in config.yaml)",
+    help="Comma-separated statuses that cause exit code 1: REGRESSION, TOOLS_CHANGED, OUTPUT_CHANGED, CONTRACT_DRIFT (default: REGRESSION)",
 )
 @click.option(
     "--warn-on",
@@ -1401,6 +1401,11 @@ async def _init_wizard_async(dir: str):
     default=None,
     help="Filter tests by difficulty level.",
 )
+@click.option(
+    "--contracts",
+    is_flag=True,
+    help="Check MCP contracts for interface drift before running tests. Fails fast if external servers changed.",
+)
 def run(
     path: Optional[str],
     pattern: str,
@@ -1432,6 +1437,7 @@ def run(
     runs: Optional[int],
     pass_rate: float,
     difficulty: Optional[str],
+    contracts: bool,
 ):
     """Run test cases against the agent.
 
@@ -1448,7 +1454,7 @@ def run(
 
     # Handle --strict flag (overrides config and CLI)
     if strict:
-        fail_on = "REGRESSION,TOOLS_CHANGED,OUTPUT_CHANGED"
+        fail_on = "REGRESSION,TOOLS_CHANGED,OUTPUT_CHANGED,CONTRACT_DRIFT"
         warn_on = ""
 
     asyncio.run(_run_async(
@@ -1456,7 +1462,8 @@ def run(
         sequential, max_workers, max_retries, retry_delay, watch, html_report, summary, coverage,
         adapter_override=adapter, diff=diff, diff_report=diff_report,
         fail_on=fail_on, warn_on=warn_on, trace=trace, trace_out=trace_out,
-        runs=runs, pass_rate=pass_rate, difficulty_filter=difficulty
+        runs=runs, pass_rate=pass_rate, difficulty_filter=difficulty,
+        contracts=contracts,
     ))
 
 
@@ -1488,6 +1495,7 @@ async def _run_async(
     runs: Optional[int] = None,
     pass_rate: float = 0.8,
     difficulty_filter: Optional[str] = None,
+    contracts: bool = False,
 ):
     """Async implementation of run command."""
     import fnmatch
@@ -1655,6 +1663,52 @@ async def _run_async(
         else:
             warn_on = str(config_warn_on)
 
+    # --- MCP Contract Check (runs before tests, fail fast) ---
+    # Placed after fail_on/warn_on resolution so config.yaml defaults are available.
+    contract_drifts = []
+    if contracts:
+        from evalview.core.mcp_contract import ContractStore
+        from evalview.core.contract_diff import diff_contract, ContractDriftStatus
+        from evalview.adapters.mcp_adapter import MCPAdapter as MCPContractAdapter
+
+        contract_store = ContractStore()
+        all_contracts = contract_store.list_contracts()
+
+        if all_contracts:
+            console.print("[cyan]━━━ MCP Contract Check ━━━[/cyan]\n")
+
+            for meta in all_contracts:
+                contract = contract_store.load_contract(meta.server_name)
+                if not contract:
+                    continue
+
+                mcp_adapter = MCPContractAdapter(endpoint=contract.metadata.endpoint, timeout=30.0)
+                try:
+                    current_tools = await mcp_adapter.discover_tools()
+                except Exception as e:
+                    console.print(f"  [yellow]WARN: {meta.server_name}[/yellow] - could not connect: {e}")
+                    continue
+
+                contract_result = diff_contract(contract, current_tools)
+
+                if contract_result.status == ContractDriftStatus.CONTRACT_DRIFT:
+                    contract_drifts.append(contract_result)
+                    console.print(f"  [red]CONTRACT_DRIFT: {meta.server_name}[/red] - {contract_result.summary()}")
+                    for change in contract_result.breaking_changes:
+                        console.print(f"    [red]{change.kind.value}: {change.tool_name}[/red] - {change.detail}")
+                else:
+                    console.print(f"  [green]PASSED: {meta.server_name}[/green]")
+
+            console.print()
+
+            # Fail fast if contract drift detected and fail_on includes it
+            if contract_drifts and "CONTRACT_DRIFT" in fail_on.upper():
+                console.print("[bold red]Aborting: MCP contract drift detected. Fix contracts before running tests.[/bold red]")
+                console.print("[dim]Accept changes: evalview mcp snapshot <endpoint> --name <name>[/dim]\n")
+                raise SystemExit(1)
+        else:
+            console.print("[dim]--contracts: No contracts found. Create one: evalview mcp snapshot <endpoint> --name <name>[/dim]\n")
+
     # Extract model config (can be string or dict)
     model_config = config.get("model", {})
     if verbose and model_config:
@@ -2807,7 +2861,7 @@ async def update_display():
         # Parse fail_on and warn_on into sets
         fail_statuses = set()
         warn_statuses = set()
-        valid_statuses = {"REGRESSION", "TOOLS_CHANGED", "OUTPUT_CHANGED", "PASSED"}
+        valid_statuses = {"REGRESSION", "TOOLS_CHANGED", "OUTPUT_CHANGED", "PASSED", "CONTRACT_DRIFT"}
 
         for s in fail_on.upper().split(","):
             s = s.strip()
@@ -5976,6 +6030,275 @@ def golden_show(test_name: str):
     console.print()
 
 
+# ============================================================================
+# MCP Contract Commands
+# ============================================================================
+
+
+@main.group()
+def mcp():
+    """Manage MCP contracts (detect external server interface drift).
+
+    MCP contracts are snapshots of an external MCP server's tool definitions.
+    Use them with `evalview run --contracts` to detect when servers change
+    their interface before your tests break.
+
+    Examples:
+        evalview mcp snapshot "npx:@modelcontextprotocol/server-github" --name server-github
+        evalview mcp check server-github
+        evalview mcp list
+    """
+    pass
+
+
+@mcp.command("snapshot")
+@click.argument("endpoint")
+@click.option("--name", "-n", required=True, help="Server name (used as contract identifier)")
+@click.option("--notes", help="Notes about this snapshot")
+@click.option("--timeout", default=30.0, type=float, help="Connection timeout in seconds")
+def mcp_snapshot(endpoint: str, name: str, notes: str, timeout: float):
+    """Snapshot an MCP server's tool definitions as a contract.
+
+    ENDPOINT is the MCP server endpoint (e.g., "npx:@modelcontextprotocol/server-github").
+
+    Examples:
+        evalview mcp snapshot "npx:@modelcontextprotocol/server-filesystem /tmp" --name fs-server
+        evalview mcp snapshot "http://localhost:8080" --name my-server --notes "v2.1 release"
+    """
+    import asyncio
+    from evalview.adapters.mcp_adapter import MCPAdapter
+    from evalview.core.mcp_contract import ContractStore
+
+    console.print(f"\n[cyan]━━━ MCP Contract Snapshot ━━━[/cyan]\n")
+    console.print(f"  Server: [bold]{name}[/bold]")
+    console.print(f"  Endpoint: {endpoint}")
+    console.print()
+
+    adapter = MCPAdapter(endpoint=endpoint, timeout=timeout)
+
+    try:
+        tools = asyncio.run(adapter.discover_tools())
+    except Exception as e:
+        console.print(f"[red]Failed to connect to MCP server: {e}[/red]")
+        console.print("[dim]Check that the server is running and the endpoint is correct.[/dim]\n")
+        raise SystemExit(1)
+
+    if not tools:
+        console.print("[yellow]Server returned no tools.[/yellow]\n")
+        raise SystemExit(1)
+
+    store = ContractStore()
+
+    if store.has_contract(name):
+        if not click.confirm(
+            f"Contract '{name}' already exists. Overwrite?",
+            default=False,
+        ):
+            console.print("[dim]Cancelled[/dim]\n")
+            return
+
+    path = store.save_contract(
+        server_name=name,
+        endpoint=endpoint,
+        tools=tools,
+        notes=notes,
+    )
+
+    console.print(f"[green]Snapshot saved: {path}[/green]")
+    console.print(f"  Tools discovered: [bold]{len(tools)}[/bold]")
+    for tool in tools:
+        desc = tool.get("description", "")
+        if len(desc) > 60:
+            desc = desc[:57] + "..."
+        console.print(f"    [dim]- {tool['name']}[/dim]  {desc}")
+    console.print()
+    console.print("[dim]Check for drift: evalview mcp check " + name + "[/dim]")
+    console.print("[dim]Use in CI: evalview run --contracts --fail-on CONTRACT_DRIFT[/dim]\n")
+
+
+@mcp.command("check")
+@click.argument("name")
+@click.option("--endpoint", help="Override endpoint (default: use endpoint from snapshot)")
+@click.option("--timeout", default=30.0, type=float, help="Connection timeout in seconds")
+def mcp_check(name: str, endpoint: str, timeout: float):
+    """Check an MCP server for contract drift.
+
+    NAME is the contract name (from `evalview mcp snapshot --name`).
+
+    Examples:
+        evalview mcp check server-github
+        evalview mcp check my-server --endpoint "http://new-host:8080"
+    """
+    import asyncio
+    from evalview.adapters.mcp_adapter import MCPAdapter
+    from evalview.core.mcp_contract import ContractStore
+    from evalview.core.contract_diff import diff_contract, ContractDriftStatus
+
+    store = ContractStore()
+    contract = store.load_contract(name)
+
+    if not contract:
+        console.print(f"\n[red]No contract found: {name}[/red]")
+        console.print("[dim]Create one with: evalview mcp snapshot <endpoint> --name " + name + "[/dim]\n")
+        raise SystemExit(1)
+
+    target_endpoint = endpoint or contract.metadata.endpoint
+    adapter = MCPAdapter(endpoint=target_endpoint, timeout=timeout)
+
+    console.print(f"\n[cyan]━━━ MCP Contract Check ━━━[/cyan]\n")
+    console.print(f"  Contract: [bold]{name}[/bold]")
+    console.print(f"  Endpoint: {target_endpoint}")
+
+    # Show snapshot age
+    age = datetime.now() - contract.metadata.snapshot_at
+    age_days = age.days
+    if age_days > 30:
+        console.print(f"  Snapshot age: [yellow]{age_days} days (consider refreshing)[/yellow]")
+    else:
+        console.print(f"  Snapshot age: [dim]{age_days} day(s)[/dim]")
+    console.print()
+
+    try:
+        current_tools = asyncio.run(adapter.discover_tools())
+    except Exception as e:
+        console.print(f"[red]Failed to connect to MCP server: {e}[/red]")
+        console.print("[dim]The server may be down. Use --endpoint to try a different host.[/dim]\n")
+        raise SystemExit(2)
+
+    result = diff_contract(contract, current_tools)
+
+    if result.status == ContractDriftStatus.PASSED:
+        if result.informational_changes:
+            console.print(f"[green]PASSED[/green] - No breaking changes ({result.summary()})")
+            console.print()
+            for change in result.informational_changes:
+                console.print(f"  [dim]+ {change.tool_name}: {change.detail}[/dim]")
+        else:
+            console.print("[green]PASSED[/green] - Interface matches snapshot exactly")
+        console.print()
+    else:
+        console.print(f"[red]CONTRACT_DRIFT[/red] - {result.summary()}\n")
+
+        for change in result.breaking_changes:
+            if change.kind.value == "removed":
+                console.print(f"  [red]REMOVED: {change.tool_name}[/red] - {change.detail}")
+            else:
+                console.print(f"  [red]CHANGED: {change.tool_name}[/red] - {change.detail}")
+
+        if result.informational_changes:
+            console.print()
+            for change in result.informational_changes:
+                console.print(f"  [dim]INFO: {change.tool_name} - {change.detail}[/dim]")
+
+        console.print()
+        console.print("[dim]To accept the new interface: evalview mcp snapshot " + target_endpoint + " --name " + name + "[/dim]\n")
+        raise SystemExit(1)
+
+
+@mcp.command("list")
+def mcp_list():
+    """List all MCP contract snapshots.
+
+    Shows all saved contracts with metadata.
+    """
+    from evalview.core.mcp_contract import ContractStore
+
+    store = ContractStore()
+    contracts = store.list_contracts()
+
+    if not contracts:
+        console.print("\n[yellow]No MCP contracts found.[/yellow]")
+        console.print("[dim]Create one: evalview mcp snapshot <endpoint> --name <name>[/dim]\n")
+        return
+
+    console.print("\n[cyan]━━━ MCP Contracts ━━━[/cyan]\n")
+
+    for c in sorted(contracts, key=lambda x: x.server_name):
+        age = datetime.now() - c.snapshot_at
+        age_str = f"{age.days}d ago" if age.days > 0 else "today"
+
+        console.print(f"  [bold]{c.server_name}[/bold]")
+        console.print(f"    [dim]Endpoint: {c.endpoint}[/dim]")
+        console.print(f"    [dim]Tools: {c.tool_count} | Snapshot: {age_str}[/dim]")
+        if c.notes:
+            console.print(f"    [dim]Notes: {c.notes}[/dim]")
+        console.print()
+
+    console.print(f"[dim]Total: {len(contracts)} contract(s)[/dim]")
+    console.print("[dim]Check for drift: evalview mcp check <name>[/dim]\n")
+
+
+@mcp.command("delete")
+@click.argument("name")
+@click.option("--force", "-f", is_flag=True, help="Skip confirmation")
+def mcp_delete(name: str, force: bool):
+    """Delete an MCP contract snapshot.
+
+    NAME is the contract name to delete.
+    """
+    from evalview.core.mcp_contract import ContractStore
+
+    store = ContractStore()
+
+    if not store.has_contract(name):
+        console.print(f"\n[yellow]No contract found: {name}[/yellow]\n")
+        return
+
+    if not force:
+        if not click.confirm(f"Delete contract '{name}'?", default=False):
+            console.print("[dim]Cancelled[/dim]")
+            return
+
+    store.delete_contract(name)
+    console.print(f"\n[green]Deleted contract: {name}[/green]\n")
+
+
+@mcp.command("show")
+@click.argument("name")
+def mcp_show(name: str):
+    """Show details of an MCP contract snapshot.
+
+    NAME is the contract name.
+    """
+    from evalview.core.mcp_contract import ContractStore
+    from rich.panel import Panel
+
+    store = ContractStore()
+    contract = store.load_contract(name)
+
+    if not contract:
+        console.print(f"\n[yellow]No contract found: {name}[/yellow]")
+        console.print("[dim]Create one: evalview mcp snapshot <endpoint> --name " + name + "[/dim]\n")
+        return
+
+    meta = contract.metadata
+    age = datetime.now() - meta.snapshot_at
+
+    console.print(f"\n[cyan]━━━ MCP Contract: {meta.server_name} ━━━[/cyan]\n")
+    console.print(f"  Endpoint: {meta.endpoint}")
+    console.print(f"  Snapshot: {meta.snapshot_at.strftime('%Y-%m-%d %H:%M')} ({age.days}d ago)")
+    console.print(f"  Protocol: {meta.protocol_version}")
+    console.print(f"  Schema hash: {meta.schema_hash}")
+    if meta.notes:
+        console.print(f"  Notes: {meta.notes}")
+    console.print()
+
+    console.print(f"[bold]Tools ({meta.tool_count}):[/bold]\n")
+
+    for tool in contract.tools:
+        console.print(f"  [bold]{tool.name}[/bold]")
+        if tool.description:
+            console.print(f"    {tool.description}")
+        if tool.inputSchema.get("properties"):
+            props = tool.inputSchema["properties"]
+            required = set(tool.inputSchema.get("required", []))
+            for pname, pdef in props.items():
+                ptype = pdef.get("type", "any")
+                req_marker = " [red]*[/red]" if pname in required else ""
+                console.print(f"    [dim]- {pname}: {ptype}{req_marker}[/dim]")
+        console.print()
+
+
 @main.command()
 @click.option(
     "--provider",
diff --git a/evalview/core/contract_diff.py b/evalview/core/contract_diff.py
new file mode 100644
index 0000000..f43dc49
--- /dev/null
+++ b/evalview/core/contract_diff.py
@@ -0,0 +1,246 @@
+"""Contract diff engine for detecting MCP server interface drift.
+
+Compares a saved contract snapshot against the current tool definitions
+from an MCP server. Detects:
+  - REMOVED tools (breaking)
+  - ADDED tools (informational)
+  - CHANGED schemas: new required params, renamed params, type changes (breaking)
+  - Description-only changes (informational, ignored by default)
+
+This is the mirror of diff.py: diff.py compares agent behavior traces,
+contract_diff.py compares server interface schemas.
+"""
+
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import List, Dict, Any, Set
+import logging
+
+from evalview.core.mcp_contract import MCPContract, ToolSchema
+
+logger = logging.getLogger(__name__)
+
+
+class ContractDriftStatus(Enum):
+    """Result of comparing current tools against a contract snapshot.
+
+    Two states:
+    - PASSED: Interface matches snapshot (no breaking changes).
+    - CONTRACT_DRIFT: Breaking changes detected (tools removed, schemas changed).
+    """
+
+    PASSED = "passed"
+    CONTRACT_DRIFT = "contract_drift"
+
+
+class ChangeKind(Enum):
+    """Kind of change detected in a tool schema."""
+
+    REMOVED = "removed"           # Tool no longer exists (breaking)
+    ADDED = "added"               # New tool available (informational)
+    PARAM_ADDED_REQ = "param_added_required"  # New required param (breaking)
+    PARAM_REMOVED = "param_removed"           # Param removed (breaking)
+    PARAM_TYPE_CHANGED = "param_type_changed"  # Param type changed (breaking)
+    PARAM_ADDED_OPT = "param_added_optional"  # New optional param (safe)
+    DESCRIPTION_CHANGED = "description_changed"  # Description changed (info)
+
+
+# Changes that constitute a breaking contract drift
+BREAKING_CHANGES: Set[ChangeKind] = {
+    ChangeKind.REMOVED,
+    ChangeKind.PARAM_ADDED_REQ,
+    ChangeKind.PARAM_REMOVED,
+    ChangeKind.PARAM_TYPE_CHANGED,
+}
+
+
+@dataclass
+class ToolChange:
+    """A single change detected in a tool's schema."""
+
+    tool_name: str
+    kind: ChangeKind
+    detail: str
+
+    @property
+    def is_breaking(self) -> bool:
+        return self.kind in BREAKING_CHANGES
+
+
+@dataclass
+class ContractDiff:
+    """Complete diff between a contract snapshot and current server tools."""
+
+    server_name: str
+    changes: List[ToolChange] = field(default_factory=list)
+    snapshot_tool_count: int = 0
+    current_tool_count: int = 0
+
+    @property
+    def has_breaking_changes(self) -> bool:
+        return any(c.is_breaking for c in self.changes)
+
+    @property
+    def status(self) -> ContractDriftStatus:
+        if self.has_breaking_changes:
+            return ContractDriftStatus.CONTRACT_DRIFT
+        return ContractDriftStatus.PASSED
+
+    @property
+    def breaking_changes(self) -> List[ToolChange]:
+        return [c for c in self.changes if c.is_breaking]
+
+    @property
+    def informational_changes(self) -> List[ToolChange]:
+        return [c for c in self.changes if not c.is_breaking]
+
+    def summary(self) -> str:
+        if not self.changes:
+            return "No changes"
+
+        breaking = len(self.breaking_changes)
+        info = len(self.informational_changes)
+        parts = []
+        if breaking:
+            parts.append(f"{breaking} breaking change(s)")
+        if info:
+            parts.append(f"{info} informational change(s)")
+        return ", ".join(parts)
+
+
+def _get_required_params(schema: Dict[str, Any]) -> Set[str]:
+    """Extract required parameter names from a JSON Schema."""
+    return set(schema.get("required", []))
+
+
+def _get_all_params(schema: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
+    """Extract all parameter definitions from a JSON Schema."""
+    return schema.get("properties", {})
+
+
+def _diff_tool_schema(
+    tool_name: str,
+    snapshot: ToolSchema,
+    current: ToolSchema,
+) -> List[ToolChange]:
+    """Compare two versions of the same tool's schema."""
+    changes: List[ToolChange] = []
+
+    # Description change (informational)
+    if snapshot.description != current.description:
+        changes.append(ToolChange(
+            tool_name=tool_name,
+            kind=ChangeKind.DESCRIPTION_CHANGED,
+            detail=(
+                f"description changed from "
+                f"'{snapshot.description[:60]}' to '{current.description[:60]}'"
+            ),
+        ))
+
+    snap_schema = snapshot.inputSchema
+    curr_schema = current.inputSchema
+
+    snap_params = _get_all_params(snap_schema)
+    curr_params = _get_all_params(curr_schema)
+    snap_required = _get_required_params(snap_schema)
+    curr_required = _get_required_params(curr_schema)
+
+    snap_names = set(snap_params.keys())
+    curr_names = set(curr_params.keys())
+
+    # Removed parameters (breaking)
+    for name in snap_names - curr_names:
+        changes.append(ToolChange(
+            tool_name=tool_name,
+            kind=ChangeKind.PARAM_REMOVED,
+            detail=f"parameter '{name}' removed",
+        ))
+
+    # Added parameters
+    for name in curr_names - snap_names:
+        if name in curr_required:
+            changes.append(ToolChange(
+                tool_name=tool_name,
+                kind=ChangeKind.PARAM_ADDED_REQ,
+                detail=f"new required parameter '{name}'",
+            ))
+        else:
+            changes.append(ToolChange(
+                tool_name=tool_name,
+                kind=ChangeKind.PARAM_ADDED_OPT,
+                detail=f"new optional parameter '{name}'",
+            ))
+
+    # Changed parameters (type changes)
+    for name in snap_names & curr_names:
+        snap_type = snap_params[name].get("type")
+        curr_type = curr_params[name].get("type")
+        if snap_type != curr_type:
+            changes.append(ToolChange(
+                tool_name=tool_name,
+                kind=ChangeKind.PARAM_TYPE_CHANGED,
+                detail=f"parameter '{name}' type changed from '{snap_type}' to '{curr_type}'",
+            ))
+
+        # Parameter became required (breaking - it's a new constraint)
+        if name not in snap_required and name in curr_required:
+            changes.append(ToolChange(
+                tool_name=tool_name,
+                kind=ChangeKind.PARAM_ADDED_REQ,
+                detail=f"parameter '{name}' became required",
+            ))
+
+    return changes
+
+
+def diff_contract(
+    contract: MCPContract,
+    current_tools: List[Dict[str, Any]],
+) -> ContractDiff:
+    """Compare a saved contract against current tool definitions.
+
+    Args:
+        contract: The saved contract snapshot.
+        current_tools: Current tool definitions from the MCP server.
+
+    Returns:
+        ContractDiff with all detected changes.
+    """
+    current_schemas = {
+        t["name"]: ToolSchema.model_validate(t) for t in current_tools
+    }
+    snapshot_schemas = {t.name: t for t in contract.tools}
+
+    changes: List[ToolChange] = []
+
+    # Removed tools (in snapshot but not in current)
+    for name in snapshot_schemas:
+        if name not in current_schemas:
+            changes.append(ToolChange(
+                tool_name=name,
+                kind=ChangeKind.REMOVED,
+                detail=f"tool '{name}' no longer available",
+            ))
+
+    # Added tools (in current but not in snapshot)
+    for name in current_schemas:
+        if name not in snapshot_schemas:
+            changes.append(ToolChange(
+                tool_name=name,
+                kind=ChangeKind.ADDED,
+                detail=f"new tool '{name}' available",
+            ))
+
+    # Changed tools (in both)
+    for name in snapshot_schemas.keys() & current_schemas.keys():
+        tool_changes = _diff_tool_schema(
+            name, snapshot_schemas[name], current_schemas[name]
+        )
+        changes.extend(tool_changes)
+
+    return ContractDiff(
+        server_name=contract.metadata.server_name,
+        changes=changes,
+        snapshot_tool_count=len(snapshot_schemas),
+        current_tool_count=len(current_schemas),
+    )
diff --git a/evalview/core/diff.py b/evalview/core/diff.py
index 95f2f0a..c623bf5 100644
--- a/evalview/core/diff.py
+++ b/evalview/core/diff.py
@@ -25,17 +25,19 @@ class DiffStatus(Enum):
     A test may have additional pass/fail criteria (cost limits, latency thresholds)
     beyond the diff status.
 
-    Four states with clear developer-friendly terminology:
+    Five states with clear developer-friendly terminology:
     - PASSED: Output and tools match within tolerance - safe to ship
     - TOOLS_CHANGED: Tool sequence differs - agent behavior shifted, review before deploy
     - OUTPUT_CHANGED: Same tools but output differs beyond threshold - review before deploy
     - REGRESSION: Score dropped significantly - likely a bug, fix before deploy
+    - CONTRACT_DRIFT: External MCP server interface changed - fix integration before deploy
     """
 
     PASSED = "passed"                # Output and tools match within tolerance
     TOOLS_CHANGED = "tools_changed"  # Tool sequence differs from golden
     OUTPUT_CHANGED = "output_changed"  # Output differs beyond similarity threshold
     REGRESSION = "regression"        # Score dropped >5 points from golden
+    CONTRACT_DRIFT = "contract_drift"  # External MCP server interface changed
 
 
 # Alias for backwards compatibility
diff --git a/evalview/core/mcp_contract.py b/evalview/core/mcp_contract.py
new file mode 100644
index 0000000..56aa890
--- /dev/null
+++ b/evalview/core/mcp_contract.py
@@ -0,0 +1,152 @@
+"""MCP contract storage and management.
+
+MCP contracts are snapshots of an external MCP server's tool definitions.
+When running tests with --contracts, current tool definitions are compared
+against the snapshot to detect interface drift before tests execute.
+
+This is the mirror of golden traces: golden traces detect when YOUR agent
+drifts, contracts detect when an EXTERNAL server drifts underneath you.
+
+Storage format:
+  .evalview/contracts/
+    <server-name>.contract.json    # The tool schema snapshot
+"""
+
+import json
+import hashlib
+from pathlib import Path
+from datetime import datetime
+from typing import Optional, List, Dict, Any
+from pydantic import BaseModel, Field
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class ToolSchema(BaseModel):
+    """Schema for a single MCP tool."""
+
+    name: str
+    description: str = ""
+    inputSchema: Dict[str, Any] = Field(default_factory=dict)
+
+
+class ContractMetadata(BaseModel):
+    """Metadata about a contract snapshot."""
+
+    server_name: str
+    endpoint: str
+    snapshot_at: datetime
+    protocol_version: str = "2024-11-05"
+    tool_count: int = 0
+    notes: Optional[str] = None
+    schema_hash: str = ""  # Hash of tool schemas for quick comparison
+
+
+class MCPContract(BaseModel):
+    """A contract snapshot from an MCP server."""
+
+    metadata: ContractMetadata
+    tools: List[ToolSchema] = Field(default_factory=list)
+
+    @property
+    def tool_names(self) -> List[str]:
+        return [t.name for t in self.tools]
+
+
+class ContractStore:
+    """Manages MCP contract storage and retrieval."""
+
+    def __init__(self, base_path: Optional[Path] = None):
+        self.base_path = base_path or Path(".")
+        self.contracts_dir = self.base_path / ".evalview" / "contracts"
+
+    def _safe_name(self, server_name: str) -> str:
+        return "".join(c if c.isalnum() or c in "._-" else "_" for c in server_name)
+
+    def _get_contract_path(self, server_name: str) -> Path:
+        return self.contracts_dir / f"{self._safe_name(server_name)}.contract.json"
+
+    def _hash_schemas(self, tools: List[Dict[str, Any]]) -> str:
+        """Hash tool schemas for quick drift comparison."""
+        canonical = json.dumps(tools, sort_keys=True, default=str)
+        return hashlib.sha256(canonical.encode()).hexdigest()[:16]
+
+    def save_contract(
+        self,
+        server_name: str,
+        endpoint: str,
+        tools: List[Dict[str, Any]],
+        notes: Optional[str] = None,
+    ) -> Path:
+        """Save a tool schema snapshot as a contract.
+
+        Args:
+            server_name: Human-readable server identifier.
+            endpoint: The MCP server endpoint used for discovery.
+            tools: Raw tool definitions from tools/list response.
+            notes: Optional notes about this snapshot.
+
+        Returns:
+            Path to saved contract file.
+        """
+        self.contracts_dir.mkdir(parents=True, exist_ok=True)
+
+        tool_schemas = [ToolSchema.model_validate(t) for t in tools]
+
+        contract = MCPContract(
+            metadata=ContractMetadata(
+                server_name=server_name,
+                endpoint=endpoint,
+                snapshot_at=datetime.now(),
+                tool_count=len(tool_schemas),
+                notes=notes,
+                schema_hash=self._hash_schemas(tools),
+            ),
+            tools=tool_schemas,
+        )
+
+        contract_path = self._get_contract_path(server_name)
+        with open(contract_path, "w") as f:
+            f.write(contract.model_dump_json(indent=2))
+
+        logger.info(f"Saved contract: {contract_path}")
+        return contract_path
+
+    def load_contract(self, server_name: str) -> Optional[MCPContract]:
+        """Load a contract by server name."""
+        contract_path = self._get_contract_path(server_name)
+        if not contract_path.exists():
+            return None
+
+        with open(contract_path) as f:
+            data = json.load(f)
+
+        return MCPContract.model_validate(data)
+
+    def has_contract(self, server_name: str) -> bool:
+        return self._get_contract_path(server_name).exists()
+
+    def list_contracts(self) -> List[ContractMetadata]:
+        """List all saved contracts."""
+        if not self.contracts_dir.exists():
+            return []
+
+        results = []
+        for path in self.contracts_dir.glob("*.contract.json"):
+            try:
+                with open(path) as f:
+                    data = json.load(f)
+                results.append(ContractMetadata.model_validate(data["metadata"]))
+            except Exception as e:
+                logger.warning(f"Failed to load contract {path}: {e}")
+
+        return results
+
+    def delete_contract(self, server_name: str) -> bool:
+        """Delete a contract."""
+        contract_path = self._get_contract_path(server_name)
+        if contract_path.exists():
+            contract_path.unlink()
+            return True
+        return False
diff --git a/examples/mcp/contract-testing.yaml b/examples/mcp/contract-testing.yaml
new file mode 100644
index 0000000..7caad1d
--- /dev/null
+++ b/examples/mcp/contract-testing.yaml
@@ -0,0 +1,42 @@
+# MCP Contract Testing Example
+#
+# This demonstrates evaluating an agent that uses an external MCP server
+# you don't control (Scenario 2). The workflow:
+#
+#   1. Snapshot the server:
+#      evalview mcp snapshot "npx:@modelcontextprotocol/server-filesystem /tmp" --name fs-server
+#
+#   2. Run this test with contract checking:
+#      evalview run examples/mcp/contract-testing.yaml --contracts --fail-on "REGRESSION,CONTRACT_DRIFT"
+#
+#   3. If the server's tools change, the run fails before this test even executes.
+#
+# This test validates that an AI agent correctly uses filesystem tools.
+# The contract ensures those tools still exist with the expected interface.
+
+name: "Agent uses filesystem MCP tools"
+description: "Test that the agent calls the right MCP tools for a file read task"
+
+adapter: mcp
+endpoint: "npx:@modelcontextprotocol/server-filesystem /tmp"
+
+# Reference the contract for this server (checked by --contracts flag)
+# If the contract drifted, this test won't even run — saving time and
+# giving a clear error message instead of a confusing tool-not-found failure.
+
+input:
+  query: "read_file"
+  context:
+    arguments:
+      path: "/tmp/evalview-contract-test.txt"
+
+expected:
+  tools:
+    - "read_file"
+  output:
+    contains:
+      - "hello"
+
+thresholds:
+  min_score: 50
+  max_latency: 10000
diff --git a/tests/test_mcp_contracts.py b/tests/test_mcp_contracts.py
new file mode 100644
index 0000000..5f51c9d
--- /dev/null
+++ b/tests/test_mcp_contracts.py
@@ -0,0 +1,495 @@
+"""Tests for MCP contract testing: storage, diff engine, and integration."""
+
+import json
+import pytest
+from datetime import datetime
+
+from evalview.core.mcp_contract import (
+    ContractStore,
+    MCPContract,
+    ContractMetadata,
+    ToolSchema,
+)
+from evalview.core.contract_diff import (
+    diff_contract,
+    ContractDiff,
+    ContractDriftStatus,
+    ChangeKind,
+    ToolChange,
+    BREAKING_CHANGES,
+    _diff_tool_schema,
+)
+
+
+# ============================================================================
+# Test Data
+# ============================================================================
+
+SAMPLE_TOOLS = [
+    {
+        "name": "create_issue",
+        "description": "Create a new issue in a GitHub repository",
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "repo": {"type": "string"},
+                "title": {"type": "string"},
+                "body": {"type": "string"},
+            },
+            "required": ["repo", "title"],
+        },
+    },
+    {
+        "name": "list_issues",
+        "description": "List issues in a repository",
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "repo": {"type": "string"},
+                "state": {"type": "string"},
+            },
+            "required": ["repo"],
+        },
+    },
+    {
+        "name": "read_file",
+        "description": "Read a file from the filesystem",
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "path": {"type": "string"},
+            },
+            "required": ["path"],
+        },
+    },
+]
+
+
+@pytest.fixture
+def contract_store(tmp_path):
+    """Create a ContractStore using a temp directory."""
+    return ContractStore(base_path=tmp_path)
+
+
+@pytest.fixture
+def saved_contract(contract_store):
+    """Create and save a sample contract."""
+    contract_store.save_contract(
+        server_name="test-server",
+        endpoint="npx:@test/server",
+        tools=SAMPLE_TOOLS,
+        notes="Test snapshot",
+    )
+    return contract_store.load_contract("test-server")
+
+
+# ============================================================================
+# ContractStore Tests
+# ============================================================================
+
+
+class TestContractStore:
+    """Tests for MCP contract storage."""
+
+    def test_save_and_load(self, contract_store):
+        """Save a contract and load it back."""
+        path = contract_store.save_contract(
+            server_name="my-server",
+            endpoint="http://localhost:8080",
+            tools=SAMPLE_TOOLS,
+            notes="Initial snapshot",
+        )
+
+        assert path.exists()
+        assert path.suffix == ".json"
+
+        loaded = contract_store.load_contract("my-server")
+        assert loaded is not None
+        assert loaded.metadata.server_name == "my-server"
+        assert loaded.metadata.endpoint == "http://localhost:8080"
+        assert loaded.metadata.tool_count == 3
+        assert loaded.metadata.notes == "Initial snapshot"
+        assert len(loaded.tools) == 3
+
+    def test_tool_names(self, saved_contract):
+        """Contract exposes tool names."""
+        assert saved_contract.tool_names == ["create_issue", "list_issues", "read_file"]
+
+    def test_load_nonexistent(self, contract_store):
+        """Loading a nonexistent contract returns None."""
+        assert contract_store.load_contract("does-not-exist") is None
+
+    def test_has_contract(self, contract_store):
+        """has_contract returns correct boolean."""
+        assert not contract_store.has_contract("my-server")
+
+        contract_store.save_contract(
+            server_name="my-server",
+            endpoint="http://localhost:8080",
+            tools=SAMPLE_TOOLS,
+        )
+
+        assert contract_store.has_contract("my-server")
+
+    def test_list_contracts(self, contract_store):
+        """List all saved contracts."""
+        assert contract_store.list_contracts() == []
+
+        contract_store.save_contract("server-a", "http://a", SAMPLE_TOOLS[:1])
+        contract_store.save_contract("server-b", "http://b", SAMPLE_TOOLS[:2])
+
+        contracts = contract_store.list_contracts()
+        assert len(contracts) == 2
+        names = {c.server_name for c in contracts}
+        assert names == {"server-a", "server-b"}
+
+    def test_delete_contract(self, contract_store):
+        """Delete a contract."""
+        contract_store.save_contract("my-server", "http://localhost", SAMPLE_TOOLS)
+        assert contract_store.has_contract("my-server")
+
+        result = contract_store.delete_contract("my-server")
+        assert result is True
+        assert not contract_store.has_contract("my-server")
+
+    def test_delete_nonexistent(self, contract_store):
+        """Deleting a nonexistent contract returns False."""
+        assert contract_store.delete_contract("nope") is False
+
+    def test_overwrite(self, contract_store):
+        """Overwriting a contract replaces it."""
+        contract_store.save_contract("my-server", "http://old", SAMPLE_TOOLS[:1])
+        contract_store.save_contract("my-server", "http://new", SAMPLE_TOOLS[:2])
+
+        loaded = contract_store.load_contract("my-server")
+        assert loaded.metadata.endpoint == "http://new"
+        assert loaded.metadata.tool_count == 2
+
+    def test_schema_hash_changes_on_different_tools(self, contract_store):
+        """Different tool sets produce different schema hashes."""
+        contract_store.save_contract("server-a", "http://a", SAMPLE_TOOLS[:1])
+        contract_store.save_contract("server-b", "http://b", SAMPLE_TOOLS[:2])
+
+        a = contract_store.load_contract("server-a")
+        b = contract_store.load_contract("server-b")
+        assert a.metadata.schema_hash != b.metadata.schema_hash
+
+    def test_safe_name_sanitization(self, contract_store):
+        """Server names with special chars are sanitized for filesystem."""
+        contract_store.save_contract(
+            "my server/with:special chars!",
+            "http://x",
+            SAMPLE_TOOLS[:1],
+        )
+        assert contract_store.has_contract("my server/with:special chars!")
+
+    def test_metadata_timestamp(self, contract_store):
+        """Saved contract has a valid timestamp."""
+        contract_store.save_contract("ts-test", "http://x", SAMPLE_TOOLS[:1])
+        loaded = contract_store.load_contract("ts-test")
+        assert isinstance(loaded.metadata.snapshot_at, datetime)
+
+
+# ============================================================================
+# Contract Diff Engine Tests
+# ============================================================================
+
+
+class TestContractDiff:
+    """Tests for the schema diff engine."""
+
+    def test_no_changes(self, saved_contract):
+        """Identical tools produce no changes."""
+        result = diff_contract(saved_contract, SAMPLE_TOOLS)
+
+        assert result.status == ContractDriftStatus.PASSED
+        assert result.changes == []
+        assert not result.has_breaking_changes
+        assert result.summary() == "No changes"
+
+    def test_tool_removed(self, saved_contract):
+        """Removing a tool is a breaking change."""
+        current = [t for t in SAMPLE_TOOLS if t["name"] != "read_file"]
+        result = diff_contract(saved_contract, current)
+
+        assert result.status == ContractDriftStatus.CONTRACT_DRIFT
+        assert result.has_breaking_changes
+
+        removed = [c for c in result.changes if c.kind == ChangeKind.REMOVED]
+        assert len(removed) == 1
+        assert removed[0].tool_name == "read_file"
+        assert removed[0].is_breaking
+
+    def test_tool_added(self, saved_contract):
+        """Adding a tool is informational (not breaking)."""
+        current = SAMPLE_TOOLS + [{
+            "name": "delete_file",
+            "description": "Delete a file",
+            "inputSchema": {"type": "object", "properties": {}},
+        }]
+        result = diff_contract(saved_contract, current)
+
+        assert result.status == ContractDriftStatus.PASSED  # Not breaking
+        assert len(result.changes) == 1
+        assert result.changes[0].kind == ChangeKind.ADDED
+        assert result.changes[0].tool_name == "delete_file"
+        assert not result.changes[0].is_breaking
+
+    def test_required_param_added(self, saved_contract):
+        """Adding a new required parameter is breaking."""
+        current = json.loads(json.dumps(SAMPLE_TOOLS))
+        current[0]["inputSchema"]["properties"]["owner"] = {"type": "string"}
+        current[0]["inputSchema"]["required"].append("owner")
+
+        result = diff_contract(saved_contract, current)
+
+        assert result.status == ContractDriftStatus.CONTRACT_DRIFT
+        breaking = result.breaking_changes
+        assert any(
+            c.kind == ChangeKind.PARAM_ADDED_REQ and "owner" in c.detail
+            for c in breaking
+        )
+
+    def test_optional_param_added(self, saved_contract):
+        """Adding a new optional parameter is not breaking."""
+        current = json.loads(json.dumps(SAMPLE_TOOLS))
+        current[0]["inputSchema"]["properties"]["labels"] = {"type": "array"}
+        # Not added to "required"
+
+        result = diff_contract(saved_contract, current)
+
+        assert result.status == ContractDriftStatus.PASSED
+        info = result.informational_changes
+        assert any(c.kind == ChangeKind.PARAM_ADDED_OPT for c in info)
+
+    def test_param_removed(self, saved_contract):
+        """Removing a parameter is breaking."""
+        current = json.loads(json.dumps(SAMPLE_TOOLS))
+        del current[0]["inputSchema"]["properties"]["body"]
+
+        result = diff_contract(saved_contract, current)
+
+        assert result.status == ContractDriftStatus.CONTRACT_DRIFT
+        assert any(
+            c.kind == ChangeKind.PARAM_REMOVED and "body" in c.detail
+            for c in result.breaking_changes
+        )
+
+    def test_param_type_changed(self, saved_contract):
+        """Changing a parameter type is breaking."""
+        current = json.loads(json.dumps(SAMPLE_TOOLS))
+        current[1]["inputSchema"]["properties"]["repo"]["type"] = "integer"
+
+        result = diff_contract(saved_contract, current)
+
+        assert result.status == ContractDriftStatus.CONTRACT_DRIFT
+        assert any(
+            c.kind == ChangeKind.PARAM_TYPE_CHANGED
+            for c in result.breaking_changes
+        )
+
+    def test_description_changed(self, saved_contract):
+        """Description change is informational."""
+        current = json.loads(json.dumps(SAMPLE_TOOLS))
+        current[0]["description"] = "Updated description for create_issue"
+
+        result = diff_contract(saved_contract, current)
+
+        assert result.status == ContractDriftStatus.PASSED
+        assert any(c.kind == ChangeKind.DESCRIPTION_CHANGED for c in result.changes)
+
+    def test_param_became_required(self, saved_contract):
+        """A parameter becoming required is breaking."""
+        current = json.loads(json.dumps(SAMPLE_TOOLS))
+        # "body" was optional, make it required
+        current[0]["inputSchema"]["required"].append("body")
+
+        result = diff_contract(saved_contract, current)
+
+        assert result.status == ContractDriftStatus.CONTRACT_DRIFT
+        assert any(
+            c.kind == ChangeKind.PARAM_ADDED_REQ and "body" in c.detail
+            and "became required" in c.detail
+            for c in result.breaking_changes
+        )
+
+    def test_multiple_changes(self, saved_contract):
+        """Multiple changes across tools are all detected."""
+        current = json.loads(json.dumps(SAMPLE_TOOLS))
+        # Remove read_file
+        current = [t for t in current if t["name"] != "read_file"]
+        # Add required param to list_issues
+        current[1]["inputSchema"]["properties"]["owner"] = {"type": "string"}
+        current[1]["inputSchema"]["required"].append("owner")
+        # Add new tool
+        current.append({
+            "name": "merge_pr",
+            "description": "Merge a pull request",
+            "inputSchema": {"type": "object", "properties": {}},
+        })
+
+        result = diff_contract(saved_contract, current)
+
+        assert result.status == ContractDriftStatus.CONTRACT_DRIFT
+        assert result.snapshot_tool_count == 3
+        assert result.current_tool_count == 3  # removed 1, added 1
+
+        kinds = {c.kind for c in result.changes}
+        assert ChangeKind.REMOVED in kinds
+        assert ChangeKind.ADDED in kinds
+        assert ChangeKind.PARAM_ADDED_REQ in kinds
+
+    def test_summary_no_changes(self, saved_contract):
+        """Summary for no changes."""
+        result = diff_contract(saved_contract, SAMPLE_TOOLS)
+        assert result.summary() == "No changes"
+
+    def test_summary_with_breaking_only(self, saved_contract):
+        """Summary with breaking changes only."""
+        current = [t for t in SAMPLE_TOOLS if t["name"] != "read_file"]
+        result = diff_contract(saved_contract, current)
+        assert result.summary() == "1 breaking change(s)"
+
+    def test_summary_with_mixed_changes(self, saved_contract):
+        """Summary with both breaking and informational changes."""
+        current = json.loads(json.dumps(SAMPLE_TOOLS))
+        # Remove a tool (breaking) and add one (informational)
+        current = [t for t in current if t["name"] != "read_file"]
+        current.append({
+            "name": "new_tool",
+            "description": "A new tool",
+            "inputSchema": {"type": "object", "properties": {}},
+        })
+        result = diff_contract(saved_contract, current)
+        summary = result.summary()
+        assert "1 breaking change(s)" in summary
+        assert "1 informational change(s)" in summary
+
+    def test_duplicate_tool_names_in_current(self, saved_contract):
+        """Duplicate tool names in current tools are deduplicated silently."""
+        current = SAMPLE_TOOLS + [SAMPLE_TOOLS[0]]  # Duplicate create_issue
+        result = diff_contract(saved_contract, current)
+        # Dict comprehension deduplicates - should still pass
+        assert result.status == ContractDriftStatus.PASSED
+
+    def test_empty_snapshot_vs_tools(self):
+        """Empty snapshot vs populated tools shows all as added."""
+        contract = MCPContract(
+            metadata=ContractMetadata(
+                server_name="empty",
+                endpoint="http://x",
+                snapshot_at=datetime.now(),
+            ),
+            tools=[],
+        )
+        result = diff_contract(contract, SAMPLE_TOOLS)
+
+        assert result.status == ContractDriftStatus.PASSED  # All added = not breaking
+        assert all(c.kind == ChangeKind.ADDED for c in result.changes)
+        assert len(result.changes) == 3
+
+    def test_all_tools_removed(self, saved_contract):
+        """All tools removed is breaking."""
+        result = diff_contract(saved_contract, [])
+
+        assert result.status == ContractDriftStatus.CONTRACT_DRIFT
+        assert all(c.kind == ChangeKind.REMOVED for c in result.changes)
+        assert len(result.changes) == 3
+
+
+# ============================================================================
+# Breaking Changes Classification Tests
+# ============================================================================
+
+
+class TestBreakingChanges:
+    """Test that the right change kinds are classified as breaking."""
+
+    def test_breaking_set(self):
+        """Verify the breaking changes set."""
+        assert ChangeKind.REMOVED in BREAKING_CHANGES
+        assert ChangeKind.PARAM_ADDED_REQ in BREAKING_CHANGES
+        assert ChangeKind.PARAM_REMOVED in BREAKING_CHANGES
+        assert ChangeKind.PARAM_TYPE_CHANGED in BREAKING_CHANGES
+
+    def test_non_breaking_set(self):
+        """Verify non-breaking changes."""
+        assert ChangeKind.ADDED not in BREAKING_CHANGES
+        assert ChangeKind.PARAM_ADDED_OPT not in BREAKING_CHANGES
+        assert ChangeKind.DESCRIPTION_CHANGED not in BREAKING_CHANGES
+
+    def test_tool_change_is_breaking(self):
+        """ToolChange.is_breaking reflects its kind."""
+        breaking = ToolChange("t", ChangeKind.REMOVED, "gone")
+        assert breaking.is_breaking
+
+        safe = ToolChange("t", ChangeKind.ADDED, "new")
+        assert not safe.is_breaking
+
+
+# ============================================================================
+# DiffStatus Integration Tests
+# ============================================================================
+
+
+class TestDiffStatusIntegration:
+    """Test that CONTRACT_DRIFT integrates with the existing DiffStatus."""
+
+    def test_contract_drift_in_diff_status(self):
+        """CONTRACT_DRIFT is available in the main DiffStatus enum."""
+        from evalview.core.diff import DiffStatus
+
+        assert hasattr(DiffStatus, "CONTRACT_DRIFT")
+        assert DiffStatus.CONTRACT_DRIFT.value == "contract_drift"
+
+    def test_contract_drift_status_enum(self):
+        """ContractDriftStatus has the expected values."""
+        assert ContractDriftStatus.PASSED.value == "passed"
+        assert ContractDriftStatus.CONTRACT_DRIFT.value == "contract_drift"
+
+
+# ============================================================================
+# Tool Schema Diff Tests (unit level)
+# ============================================================================
+
+
+class TestToolSchemaDiff:
+    """Unit tests for _diff_tool_schema."""
+
+    def test_identical_schemas(self):
+        """No changes for identical schemas."""
+        schema = ToolSchema(
+            name="test",
+            description="A test tool",
+            inputSchema={
+                "type": "object",
+                "properties": {"x": {"type": "string"}},
+                "required": ["x"],
+            },
+        )
+        changes = _diff_tool_schema("test", schema, schema)
+        assert changes == []
+
+    def test_empty_schemas(self):
+        """No changes for two empty schemas."""
+        schema = ToolSchema(name="test", description="", inputSchema={})
+        changes = _diff_tool_schema("test", schema, schema)
+        assert changes == []
+
+    def test_schema_with_no_properties(self):
+        """Handle schemas without properties gracefully."""
+        snap = ToolSchema(
+            name="test",
+            description="old",
+            inputSchema={"type": "object"},
+        )
+        curr = ToolSchema(
+            name="test",
+            description="new",
+            inputSchema={"type": "object", "properties": {"x": {"type": "string"}}},
+        )
+        changes = _diff_tool_schema("test", snap, curr)
+        # Should detect description change and new optional param
+        kinds = {c.kind for c in changes}
+        assert ChangeKind.DESCRIPTION_CHANGED in kinds
+        assert ChangeKind.PARAM_ADDED_OPT in kinds