hidai25 · hidai25 · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026
diff --git a/action.yml b/action.yml
@@ -37,9 +37,13 @@ inputs:
     required: false
     default: 'false'
   fail-on:
-    description: 'Comma-separated statuses that fail CI (REGRESSION, TOOLS_CHANGED, OUTPUT_CHANGED)'
+    description: 'Comma-separated statuses that fail CI (REGRESSION, TOOLS_CHANGED, OUTPUT_CHANGED, CONTRACT_DRIFT)'
     required: false
     default: 'REGRESSION'
+  contracts:
+    description: 'Check MCP contracts for interface drift before running tests'
+    required: false
+    default: 'false'
   generate-report:
     description: 'Generate HTML report'
     required: false
@@ -102,7 +106,17 @@ runs:
 
         # Regression detection mode
         if [ "${{ inputs.diff }}" = "true" ]; then
-          CMD="$CMD --diff --fail-on ${{ inputs.fail-on }}"
+          CMD="$CMD --diff"
+        fi
+
+        # MCP contract drift detection
+        if [ "${{ inputs.contracts }}" = "true" ]; then
+          CMD="$CMD --contracts"
+        fi
+
+        # Set fail-on (applies to both diff and contract statuses)
+        if [ "${{ inputs.diff }}" = "true" ] || [ "${{ inputs.contracts }}" = "true" ]; then
+          CMD="$CMD --fail-on ${{ inputs.fail-on }}"
         fi
 
         if [ "${{ inputs.generate-report }}" = "true" ]; then

diff --git a/docs/MCP_CONTRACTS.md b/docs/MCP_CONTRACTS.md
@@ -0,0 +1,228 @@
+# MCP Contract Testing
+
+Detect when external MCP servers change their interface before your agent breaks.
+
+## The Problem
+
+When you use MCP servers you don't own (Scenario 2), the server can change its
+tool definitions at any time: rename parameters, remove tools, add required fields.
+Your agent tests pass today and fail tomorrow — not because your code changed,
+but because the server did.
+
+## The Solution
+
+MCP contract testing captures a snapshot of a server's tool definitions and diffs
+against it on every CI run. If the interface changed, you know immediately — before
+running your full test suite.
+
+This mirrors EvalView's golden baseline system:
+- **Golden traces** detect when your agent's *behavior* drifts
+- **MCP contracts** detect when an external server's *interface* drifts
+
+## Quick Start
+
+### 1. Snapshot a server
+
+```bash
+evalview mcp snapshot "npx:@modelcontextprotocol/server-github" --name server-github
+```
+
+Output:
+```
+Snapshot saved: .evalview/contracts/server-github.contract.json
+Tools discovered: 8
+  - create_issue
+  - list_issues
+  - create_pull_request
+  - ...
+```
+
+### 2. Check for drift
+
+```bash
+evalview mcp check server-github
+```
+
+If the server changed:
+```
+CONTRACT_DRIFT - 2 breaking change(s)
+  REMOVED: create_pull_request - tool 'create_pull_request' no longer available
+  CHANGED: list_issues - new required parameter 'owner'
+```
+
+### 3. Use in CI
+
+```bash
+evalview run tests/ --contracts --fail-on "REGRESSION,CONTRACT_DRIFT"
+```
+
+The `--contracts` flag checks all saved contracts *before* running tests.
+If any contract drifted, the run aborts immediately — no point testing against
+a broken interface.
+
+## CLI Reference
+
+### `evalview mcp snapshot`
+
+Capture tool definitions from an MCP server.
+
+```bash
+evalview mcp snapshot <endpoint> --name <server-name> [--notes "..."] [--timeout 30]
+```
+
+| Argument | Description |
+|----------|-------------|
+| `endpoint` | MCP server endpoint (e.g., `npx:@modelcontextprotocol/server-github`) |
+| `--name` | Human-readable identifier for this contract (required) |
+| `--notes` | Optional notes about this snapshot |
+| `--timeout` | Connection timeout in seconds (default: 30) |
+
+Supports all MCP transport types:
+- **stdio**: `"npx:@modelcontextprotocol/server-filesystem /tmp"`
+- **HTTP**: `"http://localhost:8080"`
+- **Command**: `"stdio:python my_server.py"`
+
+### `evalview mcp check`
+
+Compare current server interface against a saved contract.
+
+```bash
+evalview mcp check <name> [--endpoint <override>] [--timeout 30]
+```
+
+| Argument | Description |
+|----------|-------------|
+| `name` | Contract name (from `--name` in snapshot) |
+| `--endpoint` | Override endpoint (default: use endpoint from snapshot) |
+
+Exit codes:
+- `0` — No breaking changes
+- `1` — Breaking changes detected (CONTRACT_DRIFT)
+- `2` — Could not connect to server
+
+### `evalview mcp list`
+
+List all saved contracts.
+
+```bash
+evalview mcp list
+```
+
+### `evalview mcp show`
+
+Show full details of a contract including all tool schemas.
+
+```bash
+evalview mcp show <name>
+```
+
+### `evalview mcp delete`
+
+Remove a contract.
+
+```bash
+evalview mcp delete <name> [--force]
+```
+
+## Integration with `evalview run`
+
+The `--contracts` flag adds a pre-flight check to any test run:
+
+```bash
+evalview run tests/ --contracts
+```
+
+This checks all contracts in `.evalview/contracts/` before running tests.
+Combine with `--fail-on CONTRACT_DRIFT` to fail CI on drift:
+
+```bash
+evalview run tests/ --contracts --fail-on "REGRESSION,CONTRACT_DRIFT"
+```
+
+Or use `--strict` (now includes CONTRACT_DRIFT):
+
+```bash
+evalview run tests/ --contracts --strict
+```
+
+## GitHub Actions
+
+```yaml
+- name: Run EvalView
+  uses: hidai25/eval-view@v0.2.1
+  with:
+    diff: true
+    contracts: true
+    fail-on: 'REGRESSION,CONTRACT_DRIFT'
+```
+
+## What Gets Detected
+
+### Breaking changes (trigger CONTRACT_DRIFT)
+
+| Change | Example |
+|--------|---------|
+| Tool removed | `create_pull_request` no longer exists |
+| Required parameter added | New required param `owner` on `list_issues` |
+| Parameter removed | `repo` param no longer accepted |
+| Parameter type changed | `limit` changed from `string` to `integer` |
+| Parameter became required | `owner` was optional, now required |
+
+### Informational changes (logged, don't fail)
+
+| Change | Example |
+|--------|---------|
+| New tool added | `merge_pull_request` now available |
+| Optional parameter added | New optional param `labels` on `create_issue` |
+| Description changed | Tool description updated |
+
+## Contract File Format
+
+Contracts are stored as JSON in `.evalview/contracts/`:
+
+```json
+{
+  "metadata": {
+    "server_name": "server-github",
+    "endpoint": "npx:@modelcontextprotocol/server-github",
+    "snapshot_at": "2026-02-07T10:30:00",
+    "protocol_version": "2024-11-05",
+    "tool_count": 8,
+    "schema_hash": "a1b2c3d4e5f67890"
+  },
+  "tools": [
+    {
+      "name": "create_issue",
+      "description": "Create a new issue in a GitHub repository",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "repo": { "type": "string" },
+          "title": { "type": "string" },
+          "body": { "type": "string" }
+        },
+        "required": ["repo", "title"]
+      }
+    }
+  ]
+}
+```
+
+Commit these files to your repo so CI can use them.
+
+## Best Practices
+
+1. **Snapshot after verifying** — Run your tests first, confirm everything works,
+   then snapshot. The contract represents a known-good interface.
+
+2. **Refresh periodically** — If a contract is >30 days old, `evalview mcp check`
+   will warn you. Re-snapshot to accept intentional changes.
+
+3. **One contract per server** — Name contracts after the server, not the tools.
+   `server-github` not `create-issue-tool`.
+
+4. **Commit contracts** — Store `.evalview/contracts/` in git. They're small JSON
+   files and CI needs them.
+
+5. **Check before testing** — Use `--contracts` on `evalview run` so drift is
+   caught before wasting time on tests that will fail anyway.
diff --git a/evalview/adapters/mcp_adapter.py b/evalview/adapters/mcp_adapter.py
@@ -449,6 +449,107 @@ def _create_error_trace(
             trace_context=tracer.build_trace_context(),
         )
 
+    async def discover_tools(self) -> List[Dict[str, Any]]:
+        """Discover available tools from an MCP server.
+
+        Connects to the server, initializes the session, and calls tools/list
+        to retrieve all available tool definitions (name, description, inputSchema).
+
+        Returns:
+            List of tool definition dicts from the MCP server.
+
+        Raises:
+            Exception: If connection or discovery fails.
+        """
+        transport, target = self._parse_endpoint()
+
+        if transport == "stdio":
+            return await self._discover_tools_stdio(target)
+        else:
+            return await self._discover_tools_http(target)
+
+    async def _discover_tools_stdio(self, command: str) -> List[Dict[str, Any]]:
+        """Discover tools via stdio transport."""
+        import shlex
+
+        cmd_parts = shlex.split(command)
+        process = await asyncio.create_subprocess_exec(
+            *cmd_parts,
+            stdin=asyncio.subprocess.PIPE,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+
+        try:
+            await self._send_request(
+                process,
+                "initialize",
+                {
+                    "protocolVersion": "2024-11-05",
+                    "capabilities": {},
+                    "clientInfo": {"name": "evalview", "version": "0.1.7"},
+                },
+            )
+            await self._send_notification(process, "notifications/initialized", {})
+
+            result = await self._send_request(process, "tools/list", {})
+            return result.get("tools", [])
+        finally:
+            process.terminate()
+            await process.wait()
+
+    async def _discover_tools_http(self, url: str) -> List[Dict[str, Any]]:
+        """Discover tools via HTTP transport."""
+        import httpx
+
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            # Initialize session
+            self._request_id += 1
+            init_response = await client.post(
+                url,
+                json={
+                    "jsonrpc": "2.0",
+                    "id": self._request_id,
+                    "method": "initialize",
+                    "params": {
+                        "protocolVersion": "2024-11-05",
+                        "capabilities": {},
+                        "clientInfo": {"name": "evalview", "version": "0.1.7"},
+                    },
+                },
+            )
+            init_result = init_response.json()
+            if "error" in init_result:
+                raise Exception(f"MCP initialize error: {init_result['error']}")
+
+            # Send initialized notification (required by MCP protocol)
+            await client.post(
+                url,
+                json={
+                    "jsonrpc": "2.0",
+                    "method": "notifications/initialized",
+                    "params": {},
+                },
+            )
+
+            # Request tool list
+            self._request_id += 1
+            response = await client.post(
+                url,
+                json={
+                    "jsonrpc": "2.0",
+                    "id": self._request_id,
+                    "method": "tools/list",
+                    "params": {},
+                },
+            )
+            result = response.json()
+
+            if "error" in result:
+                raise Exception(f"MCP tools/list error: {result['error']}")
+
+            return result.get("result", {}).get("tools", [])
+
     async def health_check(self) -> bool:
         """Check if MCP server is reachable."""
         transport, target = self._parse_endpoint()