From 3cdec9e15df77c07aa8852c99afd38019ac049ec Mon Sep 17 00:00:00 2001
From: Wang Siyuan <wsy0227@sjtu.edu.cn>
Date: Tue, 9 Dec 2025 14:53:57 +0800
Subject: [PATCH 1/8] mcp: add http transport and quiet shutdown

---
 README.md                  | 13 +++++-
 docs/contributing.md       |  1 +
 docs/getting-started.md    | 10 +++++
 src/keep_gpu/mcp/server.py | 89 +++++++++++++++++++++++++++++++++++---
 4 files changed, 105 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index a92cc77..e7caa80 100644
--- a/README.md
+++ b/README.md
@@ -92,10 +92,14 @@ with GlobalGPUController(gpu_ids=[0, 1], vram_to_keep="750MB", interval=90, busy
 
 ### MCP endpoint (experimental)
 
-- Start a simple JSON-RPC server on stdin/stdout:
+- Start a simple JSON-RPC server on stdin/stdout (default):
   ```bash
   keep-gpu-mcp-server
   ```
+- Or expose it over HTTP (JSON-RPC 2.0 by way of POST):
+  ```bash
+  keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765
+  ```
 - Example request (one per line):
   ```json
   {"id": 1, "method": "start_keep", "params": {"gpu_ids": [0], "vram": "512MB", "interval": 60, "busy_threshold": 20}}
@@ -108,6 +112,13 @@ with GlobalGPUController(gpu_ids=[0, 1], vram_to_keep="750MB", interval=90, busy
       command: ["keep-gpu-mcp-server"]
       adapter: stdio
   ```
+- Minimal client config (HTTP MCP):
+  ```yaml
+  servers:
+    keepgpu:
+      url: http://127.0.0.1:8765/
+      adapter: http
+  ```
 
 ## Contributing
 
diff --git a/docs/contributing.md b/docs/contributing.md
index 9754a33..e02de44 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -42,6 +42,7 @@ expectations so you can get productive quickly and avoid surprises in CI.
 ## MCP server (experimental)
 
 - Start: `keep-gpu-mcp-server` (stdin/stdout JSON-RPC)
+- HTTP option: `keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765`
 - Methods: `start_keep`, `stop_keep`, `status`, `list_gpus`
 - Example request:
   ```json
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 00b8f55..8ff6ae9 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -77,6 +77,16 @@ servers:
 Tools exposed: `start_keep`, `stop_keep`, `status`, `list_gpus`. Each request is
 a single JSON line; see above for an example payload.
 
+### HTTP transport
+
+Prefer TCP instead of stdio? Run:
+
+```bash
+keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765
+```
+
+And point your MCP client at `http://127.0.0.1:8765/` (JSON-RPC 2.0 over POST).
+
 === "Editable dev install"
     ```bash
     git clone https://github.com/Wangmerlyn/KeepGPU.git
diff --git a/src/keep_gpu/mcp/server.py b/src/keep_gpu/mcp/server.py
index 47a06f4..f2ccb56 100644
--- a/src/keep_gpu/mcp/server.py
+++ b/src/keep_gpu/mcp/server.py
@@ -1,11 +1,12 @@
 """
 Minimal MCP-style JSON-RPC server for KeepGPU.
 
-The server reads JSON lines from stdin and writes JSON responses to stdout.
+Run over stdin/stdout (default) or a lightweight HTTP server.
 Supported methods:
   - start_keep(gpu_ids, vram, interval, busy_threshold, job_id)
   - stop_keep(job_id=None)  # None stops all
   - status(job_id=None)     # None lists all
+  - list_gpus()
 """
 
 from __future__ import annotations
@@ -14,6 +15,10 @@
 import json
 import sys
 import uuid
+import argparse
+import threading
+from http.server import BaseHTTPRequestHandler
+from socketserver import TCPServer
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, List, Optional
 
@@ -70,12 +75,15 @@ def start_keep(
         logger.info("Started keep session %s on GPUs %s", job_id, gpu_ids)
         return {"job_id": job_id}
 
-    def stop_keep(self, job_id: Optional[str] = None) -> Dict[str, Any]:
+    def stop_keep(
+        self, job_id: Optional[str] = None, quiet: bool = False
+    ) -> Dict[str, Any]:
         if job_id:
             session = self._sessions.pop(job_id, None)
             if session:
                 session.controller.release()
-                logger.info("Stopped keep session %s", job_id)
+                if not quiet:
+                    logger.info("Stopped keep session %s", job_id)
                 return {"stopped": [job_id]}
             return {"stopped": [], "message": "job_id not found"}
 
@@ -83,7 +91,7 @@ def stop_keep(self, job_id: Optional[str] = None) -> Dict[str, Any]:
         for job_id in stopped_ids:
             session = self._sessions.pop(job_id)
             session.controller.release()
-        if stopped_ids:
+        if stopped_ids and not quiet:
             logger.info("Stopped sessions: %s", stopped_ids)
         return {"stopped": stopped_ids}
 
@@ -111,7 +119,7 @@ def list_gpus(self) -> Dict[str, Any]:
 
     def shutdown(self) -> None:
         try:
-            self.stop_keep(None)
+            self.stop_keep(None, quiet=True)
         except Exception:  # pragma: no cover - defensive
             # Avoid noisy errors during interpreter teardown
             return
@@ -138,8 +146,31 @@ def _handle_request(server: KeepGPUServer, payload: Dict[str, Any]) -> Dict[str,
         return {"id": req_id, "error": {"message": str(exc)}}
 
 
-def main() -> None:
-    server = KeepGPUServer()
+class _JSONRPCHandler(BaseHTTPRequestHandler):
+    server_version = "KeepGPU-MCP/0.1"
+
+    def do_POST(self):  # noqa: N802
+        length = int(self.headers.get("content-length", "0"))
+        body = self.rfile.read(length).decode()
+        try:
+            payload = json.loads(body)
+            response = _handle_request(self.server.keepgpu_server, payload)  # type: ignore[attr-defined]
+            status = 200
+        except Exception as exc:  # pragma: no cover - defensive
+            response = {"error": {"message": str(exc)}}
+            status = 400
+        data = json.dumps(response).encode()
+        self.send_response(status)
+        self.send_header("content-type", "application/json")
+        self.send_header("content-length", str(len(data)))
+        self.end_headers()
+        self.wfile.write(data)
+
+    def log_message(self, format, *args):  # noqa: A003
+        return
+
+
+def run_stdio(server: KeepGPUServer) -> None:
     for line in sys.stdin:
         line = line.strip()
         if not line:
@@ -153,5 +184,49 @@ def main() -> None:
         sys.stdout.flush()
 
 
+def run_http(server: KeepGPUServer, host: str = "127.0.0.1", port: int = 8765) -> None:
+    class _Server(TCPServer):
+        allow_reuse_address = True
+
+    httpd = _Server((host, port), _JSONRPCHandler)
+    httpd.keepgpu_server = server  # type: ignore[attr-defined]
+
+    def _serve():
+        httpd.serve_forever()
+
+    thread = threading.Thread(target=_serve, daemon=True)
+    thread.start()
+    logger.info(
+        "MCP HTTP server listening on http://%s:%s", host, httpd.server_address[1]
+    )
+    try:
+        thread.join()
+    except KeyboardInterrupt:
+        pass
+    finally:
+        httpd.shutdown()
+        httpd.server_close()
+        server.shutdown()
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="KeepGPU MCP server")
+    parser.add_argument(
+        "--mode",
+        choices=["stdio", "http"],
+        default="stdio",
+        help="Transport mode (default: stdio)",
+    )
+    parser.add_argument("--host", default="127.0.0.1", help="HTTP host (http mode)")
+    parser.add_argument("--port", type=int, default=8765, help="HTTP port (http mode)")
+    args = parser.parse_args()
+
+    server = KeepGPUServer()
+    if args.mode == "stdio":
+        run_stdio(server)
+    else:
+        run_http(server, host=args.host, port=args.port)
+
+
 if __name__ == "__main__":
     main()

From 35517352e6a7f4534a991a6573b8cfc361bb069b Mon Sep 17 00:00:00 2001
From: Wang Siyuan <wsy0227@sjtu.edu.cn>
Date: Tue, 9 Dec 2025 15:10:04 +0800
Subject: [PATCH 2/8] docs: add remote MCP http examples

---
 README.md               | 13 +++++++++++++
 docs/contributing.md    |  3 +++
 docs/getting-started.md | 20 ++++++++++++++++++++
 3 files changed, 36 insertions(+)

diff --git a/README.md b/README.md
index e7caa80..24efe35 100644
--- a/README.md
+++ b/README.md
@@ -119,6 +119,19 @@ with GlobalGPUController(gpu_ids=[0, 1], vram_to_keep="750MB", interval=90, busy
       url: http://127.0.0.1:8765/
       adapter: http
   ```
+- Remote/SSH tunnel example (HTTP):
+  ```bash
+  keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765
+  ```
+  Client config (replace hostname/tunnel as needed):
+  ```yaml
+  servers:
+    keepgpu:
+      url: http://gpu-box.example.com:8765/
+      adapter: http
+  ```
+  For untrusted networks, put the server behind your own auth/reverse-proxy or
+  tunnel by way of SSH (for example, `ssh -L 8765:localhost:8765 gpu-box`).
 
 ## Contributing
 
diff --git a/docs/contributing.md b/docs/contributing.md
index e02de44..9d3f68f 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -48,6 +48,9 @@ expectations so you can get productive quickly and avoid surprises in CI.
   ```json
   {"id":1,"method":"start_keep","params":{"gpu_ids":[0],"vram":"512MB","interval":60,"busy_threshold":20}}
   ```
+- Remote tip: for shared clusters, prefer HTTP behind your own auth/reverse-proxy
+  or tunnel with SSH (`ssh -L 8765:localhost:8765 gpu-box`), then point your MCP
+  client at `http://127.0.0.1:8765/`.
 
 ## Pull requests
 
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 8ff6ae9..1d12463 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -87,6 +87,26 @@ keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765
 
 And point your MCP client at `http://127.0.0.1:8765/` (JSON-RPC 2.0 over POST).
 
+### Remote/cluster usage
+
+- Start on the GPU host:
+  ```bash
+  keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765
+  ```
+- Point your agent at the host:
+  ```yaml
+  servers:
+    keepgpu:
+      url: http://gpu-box.example.com:8765/
+      adapter: http
+  ```
+- If the host is not on a trusted network, tunnel instead of exposing the port:
+  ```bash
+  ssh -L 8765:localhost:8765 gpu-box.example.com
+  ```
+  Then use `http://127.0.0.1:8765/` in your MCP config. For multi-user clusters,
+  consider fronting the service with your own auth/reverse-proxy.
+
 === "Editable dev install"
     ```bash
     git clone https://github.com/Wangmerlyn/KeepGPU.git

From 9e4b35b343336af6014e6e527060ec0563eefd27 Mon Sep 17 00:00:00 2001
From: Wang Siyuan <sywang0227@gmail.com>
Date: Tue, 9 Dec 2025 15:17:02 +0800
Subject: [PATCH 3/8] Apply suggestions from code review

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 src/keep_gpu/mcp/server.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/keep_gpu/mcp/server.py b/src/keep_gpu/mcp/server.py
index f2ccb56..81ed71c 100644
--- a/src/keep_gpu/mcp/server.py
+++ b/src/keep_gpu/mcp/server.py
@@ -150,14 +150,14 @@ class _JSONRPCHandler(BaseHTTPRequestHandler):
     server_version = "KeepGPU-MCP/0.1"
 
     def do_POST(self):  # noqa: N802
-        length = int(self.headers.get("content-length", "0"))
-        body = self.rfile.read(length).decode()
         try:
+            length = int(self.headers.get("content-length", "0"))
+            body = self.rfile.read(length).decode("utf-8")
             payload = json.loads(body)
             response = _handle_request(self.server.keepgpu_server, payload)  # type: ignore[attr-defined]
             status = 200
-        except Exception as exc:  # pragma: no cover - defensive
-            response = {"error": {"message": str(exc)}}
+        except (json.JSONDecodeError, ValueError, UnicodeDecodeError) as exc:
+            response = {"error": {"message": f"Bad request: {exc}"}}
             status = 400
         data = json.dumps(response).encode()
         self.send_response(status)
@@ -194,7 +194,7 @@ class _Server(TCPServer):
     def _serve():
         httpd.serve_forever()
 
-    thread = threading.Thread(target=_serve, daemon=True)
+    thread = threading.Thread(target=_serve)
     thread.start()
     logger.info(
         "MCP HTTP server listening on http://%s:%s", host, httpd.server_address[1]

From a4a524e8d8d15cd6d76859397df4e15a1b9ebbf9 Mon Sep 17 00:00:00 2001
From: Wang Siyuan <wsy0227@sjtu.edu.cn>
Date: Tue, 9 Dec 2025 15:34:44 +0800
Subject: [PATCH 4/8] docs: reorganize usage docs and add mcp guide

---
 docs/getting-started.md | 89 ++++++++---------------------------------
 docs/guides/mcp.md      | 85 +++++++++++++++++++++++++++++++++++++++
 docs/index.md           |  4 +-
 mkdocs.yml              |  3 +-
 4 files changed, 107 insertions(+), 74 deletions(-)
 create mode 100644 docs/guides/mcp.md

diff --git a/docs/getting-started.md b/docs/getting-started.md
index 1d12463..1576e2a 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -9,9 +9,10 @@ understand the minimum knobs you need to keep a GPU occupied.
 - Python 3.9+ (matching the version in your environment/cluster image).
 - Optional but recommended: `nvidia-smi` in `PATH` for utilization monitoring (CUDA) or `rocm-smi` if you install the `rocm` extra.
 
-!!! warning "ROCm & multi-tenant clusters"
-    The current release focuses on CUDA devices. ROCm/AMD support is experimental;
-    controllers will raise `NotImplementedError` if CUDA is unavailable.
+!!! info "Platforms"
+    CUDA is the primary path; ROCm is supported by way of the `rocm` extra
+    (requires a ROCm-enabled PyTorch build). CPU-only environments can import
+    the package but controllers will not start.
 
 ## Install
 
@@ -39,74 +40,6 @@ understand the minimum knobs you need to keep a GPU occupied.
     pip install keep-gpu
     ```
 
-## For contributors
-
-- Install dev extras: `pip install -e ".[dev]"` (append `.[rocm]` if you need ROCm SMI).
-- Fast CUDA checks: `pytest tests/cuda_controller tests/global_controller tests/utilities/test_platform_manager.py tests/test_cli_thresholds.py`
-- ROCm-only tests are marked `rocm`; run with `pytest --run-rocm tests/rocm_controller`.
-
-## MCP endpoint (experimental)
-
-For automation clients that speak JSON-RPC (MCP-style), KeepGPU ships a tiny
-stdin/stdout server:
-
-```bash
-keep-gpu-mcp-server
-# each request is a single JSON line; example:
-echo '{"id":1,"method":"start_keep","params":{"gpu_ids":[0],"vram":"512MB","interval":60,"busy_threshold":20}}' | keep-gpu-mcp-server
-```
-
-Supported methods:
-- `start_keep(gpu_ids?, vram?, interval?, busy_threshold?, job_id?)`
-- `status(job_id?)`
-- `stop_keep(job_id?)` (no job_id stops all)
-- `list_gpus()` (basic info)
-
-### Example MCP client config (stdio)
-
-If your agent expects an MCP server definition, a minimal stdio config looks like:
-
-```yaml
-servers:
-  keepgpu:
-    description: "KeepGPU MCP server"
-    command: ["keep-gpu-mcp-server"]
-    adapter: stdio
-```
-
-Tools exposed: `start_keep`, `stop_keep`, `status`, `list_gpus`. Each request is
-a single JSON line; see above for an example payload.
-
-### HTTP transport
-
-Prefer TCP instead of stdio? Run:
-
-```bash
-keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765
-```
-
-And point your MCP client at `http://127.0.0.1:8765/` (JSON-RPC 2.0 over POST).
-
-### Remote/cluster usage
-
-- Start on the GPU host:
-  ```bash
-  keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765
-  ```
-- Point your agent at the host:
-  ```yaml
-  servers:
-    keepgpu:
-      url: http://gpu-box.example.com:8765/
-      adapter: http
-  ```
-- If the host is not on a trusted network, tunnel instead of exposing the port:
-  ```bash
-  ssh -L 8765:localhost:8765 gpu-box.example.com
-  ```
-  Then use `http://127.0.0.1:8765/` in your MCP config. For multi-user clusters,
-  consider fronting the service with your own auth/reverse-proxy.
-
 === "Editable dev install"
     ```bash
     git clone https://github.com/Wangmerlyn/KeepGPU.git
@@ -114,6 +47,12 @@ And point your MCP client at `http://127.0.0.1:8765/` (JSON-RPC 2.0 over POST).
     pip install -e .[dev]
     ```
 
+## Pick your interface
+
+- **CLI** – fastest way to reserve GPUs from a shell; see [CLI Playbook](guides/cli.md).
+- **Python module** – embed keep-alive loops inside orchestration code; see [Python API Recipes](guides/python.md).
+- **MCP server** – expose KeepGPU over JSON-RPC (stdio or HTTP) for agents; see [MCP Server](guides/mcp.md).
+
 ## Sanity check
 
 1. Make sure PyTorch can see at least one device:
@@ -149,7 +88,8 @@ ready to hand the GPU back, hit `Ctrl+C`—controllers will release VRAM and exi
 
 ## KeepGPU inside Python
 
-The CLI wraps the same controllers you can import directly:
+Prefer code-level control? Import the controllers directly (full recipes in
+[Python API Recipes](guides/python.md)):
 
 ```python
 from keep_gpu.single_gpu_controller.cuda_gpu_controller import CudaGPUController
@@ -171,3 +111,8 @@ with GlobalGPUController(gpu_ids=[0, 1], vram_to_keep="750MB", interval=60):
 
 From here, jump to the CLI Playbook for scenario-driven guidance or the API
 recipes if you need to embed KeepGPU in orchestration scripts.
+
+## For contributors
+
+Developing locally? See [Contributing](contributing.md) for dev install, test
+commands (including CUDA/ROCm markers), and PR tips.
diff --git a/docs/guides/mcp.md b/docs/guides/mcp.md
new file mode 100644
index 0000000..ba05b68
--- /dev/null
+++ b/docs/guides/mcp.md
@@ -0,0 +1,85 @@
+# MCP Server
+
+Expose KeepGPU as a minimal JSON-RPC server (MCP-style) so agents or remote
+orchestrators can start/stop keep-alive jobs and inspect GPU state.
+
+## When to use this
+
+- You run KeepGPU from an agent (LangChain, custom orchestrator, etc.) instead of a shell.
+- You want to keep GPUs alive on a remote box over TCP rather than stdio.
+- You need a quick way to list GPU utilization/memory via the same interface.
+
+## Quick start
+
+=== "stdio (default)"
+    ```bash
+    keep-gpu-mcp-server
+    ```
+    Send one JSON request per line:
+    ```bash
+    echo '{"id":1,"method":"start_keep","params":{"gpu_ids":[0],"vram":"512MB","interval":60,"busy_threshold":20}}' | keep-gpu-mcp-server
+    ```
+
+=== "HTTP"
+    ```bash
+    keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765
+    curl -X POST http://127.0.0.1:8765/ \
+      -H "content-type: application/json" \
+      -d '{"id":1,"method":"status"}'
+    ```
+
+Supported methods:
+
+- `start_keep(gpu_ids?, vram?, interval?, busy_threshold?, job_id?)`
+- `stop_keep(job_id?)` (omit `job_id` to stop all)
+- `status(job_id?)` (omit `job_id` to list active jobs)
+- `list_gpus()` (detailed info via NVML/ROCm SMI/torch)
+
+## Client configs (MCP-style)
+
+=== "stdio adapter"
+    ```yaml
+    servers:
+      keepgpu:
+        description: "KeepGPU MCP server"
+        command: ["keep-gpu-mcp-server"]
+        adapter: stdio
+    ```
+
+=== "HTTP adapter"
+    ```yaml
+    servers:
+      keepgpu:
+        url: http://127.0.0.1:8765/
+        adapter: http
+    ```
+
+## Remote/cluster usage
+
+- Run on the GPU host:
+  ```bash
+  keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765
+  ```
+- Point your client at the host:
+  ```yaml
+  servers:
+    keepgpu:
+      url: http://gpu-box.example.com:8765/
+      adapter: http
+  ```
+- If the network is untrusted, tunnel instead of exposing the port:
+  ```bash
+  ssh -L 8765:localhost:8765 gpu-box.example.com
+  ```
+  Then use `http://127.0.0.1:8765/` in your MCP config. For multi-user clusters,
+  consider fronting the service with your own auth/reverse-proxy.
+
+## Responses you can expect
+
+```json
+{"id":1,"result":{"job_id":"<uuid>"}}                # start_keep
+{"id":2,"result":{"stopped":["<uuid>"]}}            # stop_keep
+{"id":3,"result":{"active":true,"job_id":"<uuid>","params":{"gpu_ids":[0]}}}
+{"id":4,"result":{"active_jobs":[{"job_id":"<uuid>","params":{"gpu_ids":[0]}}]}}
+{"id":5,"result":{"gpus":[{"id":0,"platform":"cuda","name":"A100","memory_total":...,"memory_used":...,"utilization":12}]}}
+```
diff --git a/docs/index.md b/docs/index.md
index 36bde1a..57938fb 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -32,11 +32,13 @@ during longer CPU-bound sections of your workflow.
   for pinning cards on clusters, workstations, or Jupyter.
 - :material-code-tags: **[Python API Recipes](guides/python.md)** – Drop-in snippets
   for wrapping preprocessing stages or orchestration scripts.
+- :material-lan: **[MCP Server](guides/mcp.md)** – Expose KeepGPU by way of JSON-RPC
+  (stdio/HTTP) for agents and remote orchestration.
 - :material-diagram-project: **[How KeepGPU Works](concepts/architecture.md)** –
   Learn how controllers allocate VRAM and throttle themselves.
 - :material-book-open-outline: **[Reference](reference/cli.md)** – Full option list
   plus mkdocstrings API reference.
 
 !!! tip "Prefer a fast skim?"
-    The left sidebar mirrors the lifecycle: overview → guides → concepts →
+    The left sidebar mirrors the lifecycle: overview → usage → concepts →
     references. Jump straight to what you need; sections stand on their own.
diff --git a/mkdocs.yml b/mkdocs.yml
index d26c9b8..532338f 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -15,9 +15,10 @@ nav:
   - Overview:
       - Welcome: index.md
       - Getting Started: getting-started.md
-  - Guides:
+  - Usage:
       - CLI Playbook: guides/cli.md
       - Python API Recipes: guides/python.md
+      - MCP Server: guides/mcp.md
   - Concepts:
       - How KeepGPU Works: concepts/architecture.md
   - Reference:

From 88d56c8aa434777d24233dddf6d348bfa20e1fdd Mon Sep 17 00:00:00 2001
From: "coderabbitai[bot]"
 <136622811+coderabbitai[bot]@users.noreply.github.com>
Date: Tue, 9 Dec 2025 16:56:19 +0800
Subject: [PATCH 5/8] [docs] chore: add docstrings to `mcp-http-transport`
 (#61)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 📝 Add docstrings to `mcp-http-transport`

Docstrings generation was requested by @Wangmerlyn.

* https://github.com/Wangmerlyn/KeepGPU/pull/60#issuecomment-3630698395

The following files were modified:

* `src/keep_gpu/mcp/server.py`

* chore: format mcp server docstrings

* style: shorten mcp server docstrings for hooks

* docs: restore detailed mcp server docstrings

* docs: rephrase mcp guide for docstyle hook

---------

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
Co-authored-by: Wang Siyuan <wsy0227@sjtu.edu.cn>
---
 docs/guides/mcp.md         |  4 +--
 src/keep_gpu/mcp/server.py | 56 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/docs/guides/mcp.md b/docs/guides/mcp.md
index ba05b68..f52d047 100644
--- a/docs/guides/mcp.md
+++ b/docs/guides/mcp.md
@@ -7,7 +7,7 @@ orchestrators can start/stop keep-alive jobs and inspect GPU state.
 
 - You run KeepGPU from an agent (LangChain, custom orchestrator, etc.) instead of a shell.
 - You want to keep GPUs alive on a remote box over TCP rather than stdio.
-- You need a quick way to list GPU utilization/memory via the same interface.
+- You need a quick way to list GPU utilization/memory by way of the same interface.
 
 ## Quick start
 
@@ -33,7 +33,7 @@ Supported methods:
 - `start_keep(gpu_ids?, vram?, interval?, busy_threshold?, job_id?)`
 - `stop_keep(job_id?)` (omit `job_id` to stop all)
 - `status(job_id?)` (omit `job_id` to list active jobs)
-- `list_gpus()` (detailed info via NVML/ROCm SMI/torch)
+- `list_gpus()` (detailed info by way of NVML/ROCm SMI/torch)
 
 ## Client configs (MCP-style)
 
diff --git a/src/keep_gpu/mcp/server.py b/src/keep_gpu/mcp/server.py
index 81ed71c..e4d255a 100644
--- a/src/keep_gpu/mcp/server.py
+++ b/src/keep_gpu/mcp/server.py
@@ -52,6 +52,22 @@ def start_keep(
         busy_threshold: int = -1,
         job_id: Optional[str] = None,
     ) -> Dict[str, Any]:
+        """
+        Start a KeepGPU session that reserves VRAM on one or more GPUs.
+
+        Args:
+            gpu_ids: GPU indices to target; None uses all available GPUs.
+            vram: Human-readable VRAM size to keep (for example, "1GiB").
+            interval: Seconds between controller checks/actions.
+            busy_threshold: Utilization above which the controller backs off.
+            job_id: Optional session identifier; a UUID is generated if omitted.
+
+        Returns:
+            Dict with the started session's job_id, e.g. ``{"job_id": "<id>"}``.
+
+        Raises:
+            ValueError: If the provided job_id already exists.
+        """
         job_id = job_id or str(uuid.uuid4())
         if job_id in self._sessions:
             raise ValueError(f"job_id {job_id} already exists")
@@ -78,6 +94,20 @@ def start_keep(
     def stop_keep(
         self, job_id: Optional[str] = None, quiet: bool = False
     ) -> Dict[str, Any]:
+        """
+        Stop one or all active keep sessions.
+
+        If job_id is supplied, only that session is stopped; otherwise all active
+        sessions are released. When quiet=True, informational logging is skipped.
+
+        Args:
+            job_id: Session identifier to stop; None stops every session.
+            quiet: Suppress informational logs about stopped sessions.
+
+        Returns:
+            Dict with a "stopped" list of job ids. If a specific job_id was not
+            found, a "message" field explains the miss.
+        """
         if job_id:
             session = self._sessions.pop(job_id, None)
             if session:
@@ -118,6 +148,7 @@ def list_gpus(self) -> Dict[str, Any]:
         return {"gpus": infos}
 
     def shutdown(self) -> None:
+        """Stop all sessions quietly; ignore errors during interpreter teardown."""
         try:
             self.stop_keep(None, quiet=True)
         except Exception:  # pragma: no cover - defensive
@@ -126,6 +157,16 @@ def shutdown(self) -> None:
 
 
 def _handle_request(server: KeepGPUServer, payload: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Dispatch a JSON-RPC payload to the server and return a response dict.
+
+    Args:
+        server: Target KeepGPUServer.
+        payload: Dict with "method", optional "params", and optional "id".
+
+    Returns:
+        JSON-RPC-style dict containing either "result" or "error" plus "id".
+    """
     method = payload.get("method")
     params = payload.get("params", {}) or {}
     req_id = payload.get("id")
@@ -150,11 +191,18 @@ class _JSONRPCHandler(BaseHTTPRequestHandler):
     server_version = "KeepGPU-MCP/0.1"
 
     def do_POST(self):  # noqa: N802
+        """
+        Handle an HTTP JSON-RPC request and write a JSON response.
+
+        Expects application/json bodies containing {"method", "params", "id"}.
+        Returns 400 with an error object if parsing fails.
+        """
         try:
             length = int(self.headers.get("content-length", "0"))
             body = self.rfile.read(length).decode("utf-8")
             payload = json.loads(body)
-            response = _handle_request(self.server.keepgpu_server, payload)  # type: ignore[attr-defined]
+            server_ref = self.server.keepgpu_server  # type: ignore[attr-defined]
+            response = _handle_request(server_ref, payload)
             status = 200
         except (json.JSONDecodeError, ValueError, UnicodeDecodeError) as exc:
             response = {"error": {"message": f"Bad request: {exc}"}}
@@ -167,10 +215,12 @@ def do_POST(self):  # noqa: N802
         self.wfile.write(data)
 
     def log_message(self, format, *args):  # noqa: A003
+        """Suppress default request logging."""
         return
 
 
 def run_stdio(server: KeepGPUServer) -> None:
+    """Serve JSON-RPC requests over stdin/stdout (one JSON object per line)."""
     for line in sys.stdin:
         line = line.strip()
         if not line:
@@ -185,6 +235,8 @@ def run_stdio(server: KeepGPUServer) -> None:
 
 
 def run_http(server: KeepGPUServer, host: str = "127.0.0.1", port: int = 8765) -> None:
+    """Run a lightweight HTTP JSON-RPC server on the given host/port."""
+
     class _Server(TCPServer):
         allow_reuse_address = True
 
@@ -192,6 +244,7 @@ class _Server(TCPServer):
     httpd.keepgpu_server = server  # type: ignore[attr-defined]
 
     def _serve():
+        """Run the HTTP server loop until shutdown."""
         httpd.serve_forever()
 
     thread = threading.Thread(target=_serve)
@@ -210,6 +263,7 @@ def _serve():
 
 
 def main() -> None:
+    """CLI entry point for the KeepGPU MCP server."""
     parser = argparse.ArgumentParser(description="KeepGPU MCP server")
     parser.add_argument(
         "--mode",

From 832f7c8007b9b33a623f06aaa61e8652f8e13ca8 Mon Sep 17 00:00:00 2001
From: Wang Siyuan <wsy0227@sjtu.edu.cn>
Date: Tue, 9 Dec 2025 17:03:24 +0800
Subject: [PATCH 6/8] docs: fix mcp quick-start tabs rendering

---
 docs/guides/mcp.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/guides/mcp.md b/docs/guides/mcp.md
index f52d047..f8b437c 100644
--- a/docs/guides/mcp.md
+++ b/docs/guides/mcp.md
@@ -15,7 +15,9 @@ orchestrators can start/stop keep-alive jobs and inspect GPU state.
     ```bash
     keep-gpu-mcp-server
     ```
+
     Send one JSON request per line:
+
     ```bash
     echo '{"id":1,"method":"start_keep","params":{"gpu_ids":[0],"vram":"512MB","interval":60,"busy_threshold":20}}' | keep-gpu-mcp-server
     ```
@@ -23,6 +25,11 @@ orchestrators can start/stop keep-alive jobs and inspect GPU state.
 === "HTTP"
     ```bash
     keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765
+    ```
+
+    Query it over HTTP:
+
+    ```bash
     curl -X POST http://127.0.0.1:8765/ \
       -H "content-type: application/json" \
       -d '{"id":1,"method":"status"}'

From 38fdd4946e586421b3cc5343d3ec0ceff1fce602 Mon Sep 17 00:00:00 2001
From: Wang Siyuan <wsy0227@sjtu.edu.cn>
Date: Tue, 9 Dec 2025 17:17:13 +0800
Subject: [PATCH 7/8] docs: add mkdocs dev commands and link contributing

---
 README.md            |  2 +-
 docs/contributing.md | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 24efe35..3dcceb7 100644
--- a/README.md
+++ b/README.md
@@ -136,7 +136,7 @@ with GlobalGPUController(gpu_ids=[0, 1], vram_to_keep="750MB", interval=90, busy
 ## Contributing
 
 Contributions are welcome—especially around ROCm support, platform fallbacks, and scheduler-specific recipes. Open an issue or PR if you hit edge cases on your cluster.
-See `docs/contributing.md` for dev setup, test commands, and PR tips.
+See [docs/contributing.md](docs/contributing.md) for dev setup, test commands, and PR tips.
 
 ## Credits
 
diff --git a/docs/contributing.md b/docs/contributing.md
index 9d3f68f..03ff180 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -39,6 +39,17 @@ expectations so you can get productive quickly and avoid surprises in CI.
   pre-commit run --all-files
   ```
 
+## Docs
+
+- Live preview:
+  ```bash
+  mkdocs serve
+  ```
+- Build the static site:
+  ```bash
+  mkdocs build
+  ```
+
 ## MCP server (experimental)
 
 - Start: `keep-gpu-mcp-server` (stdin/stdout JSON-RPC)

From c84f3eb7043395bb389909616004a884e76c8275 Mon Sep 17 00:00:00 2001
From: Wang Siyuan <wsy0227@sjtu.edu.cn>
Date: Tue, 9 Dec 2025 17:28:02 +0800
Subject: [PATCH 8/8] docs: enable pymdownx tabbed for MCP quick-start

---
 mkdocs.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mkdocs.yml b/mkdocs.yml
index 532338f..210f6df 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -35,6 +35,8 @@ plugins:
 markdown_extensions:
   - admonition
   - codehilite
+  - pymdownx.tabbed:
+      alternate_style: true
   - toc:
       permalink: true
   - pymdownx.emoji: