From 3cdec9e15df77c07aa8852c99afd38019ac049ec Mon Sep 17 00:00:00 2001 From: Wang Siyuan Date: Tue, 9 Dec 2025 14:53:57 +0800 Subject: [PATCH 1/8] mcp: add http transport and quiet shutdown --- README.md | 13 +++++- docs/contributing.md | 1 + docs/getting-started.md | 10 +++++ src/keep_gpu/mcp/server.py | 89 +++++++++++++++++++++++++++++++++++--- 4 files changed, 105 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index a92cc77..e7caa80 100644 --- a/README.md +++ b/README.md @@ -92,10 +92,14 @@ with GlobalGPUController(gpu_ids=[0, 1], vram_to_keep="750MB", interval=90, busy ### MCP endpoint (experimental) -- Start a simple JSON-RPC server on stdin/stdout: +- Start a simple JSON-RPC server on stdin/stdout (default): ```bash keep-gpu-mcp-server ``` +- Or expose it over HTTP (JSON-RPC 2.0 by way of POST): + ```bash + keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765 + ``` - Example request (one per line): ```json {"id": 1, "method": "start_keep", "params": {"gpu_ids": [0], "vram": "512MB", "interval": 60, "busy_threshold": 20}} @@ -108,6 +112,13 @@ with GlobalGPUController(gpu_ids=[0, 1], vram_to_keep="750MB", interval=90, busy command: ["keep-gpu-mcp-server"] adapter: stdio ``` +- Minimal client config (HTTP MCP): + ```yaml + servers: + keepgpu: + url: http://127.0.0.1:8765/ + adapter: http + ``` ## Contributing diff --git a/docs/contributing.md b/docs/contributing.md index 9754a33..e02de44 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -42,6 +42,7 @@ expectations so you can get productive quickly and avoid surprises in CI. ## MCP server (experimental) - Start: `keep-gpu-mcp-server` (stdin/stdout JSON-RPC) +- HTTP option: `keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765` - Methods: `start_keep`, `stop_keep`, `status`, `list_gpus` - Example request: ```json diff --git a/docs/getting-started.md b/docs/getting-started.md index 00b8f55..8ff6ae9 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -77,6 +77,16 @@ servers: Tools exposed: `start_keep`, `stop_keep`, `status`, `list_gpus`. Each request is a single JSON line; see above for an example payload. +### HTTP transport + +Prefer TCP instead of stdio? Run: + +```bash +keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765 +``` + +And point your MCP client at `http://127.0.0.1:8765/` (JSON-RPC 2.0 over POST). + === "Editable dev install" ```bash git clone https://github.com/Wangmerlyn/KeepGPU.git diff --git a/src/keep_gpu/mcp/server.py b/src/keep_gpu/mcp/server.py index 47a06f4..f2ccb56 100644 --- a/src/keep_gpu/mcp/server.py +++ b/src/keep_gpu/mcp/server.py @@ -1,11 +1,12 @@ """ Minimal MCP-style JSON-RPC server for KeepGPU. -The server reads JSON lines from stdin and writes JSON responses to stdout. +Run over stdin/stdout (default) or a lightweight HTTP server. Supported methods: - start_keep(gpu_ids, vram, interval, busy_threshold, job_id) - stop_keep(job_id=None) # None stops all - status(job_id=None) # None lists all + - list_gpus() """ from __future__ import annotations @@ -14,6 +15,10 @@ import json import sys import uuid +import argparse +import threading +from http.server import BaseHTTPRequestHandler +from socketserver import TCPServer from dataclasses import dataclass from typing import Any, Callable, Dict, List, Optional @@ -70,12 +75,15 @@ def start_keep( logger.info("Started keep session %s on GPUs %s", job_id, gpu_ids) return {"job_id": job_id} - def stop_keep(self, job_id: Optional[str] = None) -> Dict[str, Any]: + def stop_keep( + self, job_id: Optional[str] = None, quiet: bool = False + ) -> Dict[str, Any]: if job_id: session = self._sessions.pop(job_id, None) if session: session.controller.release() - logger.info("Stopped keep session %s", job_id) + if not quiet: + logger.info("Stopped keep session %s", job_id) return {"stopped": [job_id]} return {"stopped": [], "message": "job_id not found"} @@ -83,7 +91,7 @@ def stop_keep(self, job_id: Optional[str] = None) -> Dict[str, Any]: for job_id in stopped_ids: session = self._sessions.pop(job_id) session.controller.release() - if stopped_ids: + if stopped_ids and not quiet: logger.info("Stopped sessions: %s", stopped_ids) return {"stopped": stopped_ids} @@ -111,7 +119,7 @@ def list_gpus(self) -> Dict[str, Any]: def shutdown(self) -> None: try: - self.stop_keep(None) + self.stop_keep(None, quiet=True) except Exception: # pragma: no cover - defensive # Avoid noisy errors during interpreter teardown return @@ -138,8 +146,31 @@ def _handle_request(server: KeepGPUServer, payload: Dict[str, Any]) -> Dict[str, return {"id": req_id, "error": {"message": str(exc)}} -def main() -> None: - server = KeepGPUServer() +class _JSONRPCHandler(BaseHTTPRequestHandler): + server_version = "KeepGPU-MCP/0.1" + + def do_POST(self): # noqa: N802 + length = int(self.headers.get("content-length", "0")) + body = self.rfile.read(length).decode() + try: + payload = json.loads(body) + response = _handle_request(self.server.keepgpu_server, payload) # type: ignore[attr-defined] + status = 200 + except Exception as exc: # pragma: no cover - defensive + response = {"error": {"message": str(exc)}} + status = 400 + data = json.dumps(response).encode() + self.send_response(status) + self.send_header("content-type", "application/json") + self.send_header("content-length", str(len(data))) + self.end_headers() + self.wfile.write(data) + + def log_message(self, format, *args): # noqa: A003 + return + + +def run_stdio(server: KeepGPUServer) -> None: for line in sys.stdin: line = line.strip() if not line: @@ -153,5 +184,49 @@ def main() -> None: sys.stdout.flush() +def run_http(server: KeepGPUServer, host: str = "127.0.0.1", port: int = 8765) -> None: + class _Server(TCPServer): + allow_reuse_address = True + + httpd = _Server((host, port), _JSONRPCHandler) + httpd.keepgpu_server = server # type: ignore[attr-defined] + + def _serve(): + httpd.serve_forever() + + thread = threading.Thread(target=_serve, daemon=True) + thread.start() + logger.info( + "MCP HTTP server listening on http://%s:%s", host, httpd.server_address[1] + ) + try: + thread.join() + except KeyboardInterrupt: + pass + finally: + httpd.shutdown() + httpd.server_close() + server.shutdown() + + +def main() -> None: + parser = argparse.ArgumentParser(description="KeepGPU MCP server") + parser.add_argument( + "--mode", + choices=["stdio", "http"], + default="stdio", + help="Transport mode (default: stdio)", + ) + parser.add_argument("--host", default="127.0.0.1", help="HTTP host (http mode)") + parser.add_argument("--port", type=int, default=8765, help="HTTP port (http mode)") + args = parser.parse_args() + + server = KeepGPUServer() + if args.mode == "stdio": + run_stdio(server) + else: + run_http(server, host=args.host, port=args.port) + + if __name__ == "__main__": main() From 35517352e6a7f4534a991a6573b8cfc361bb069b Mon Sep 17 00:00:00 2001 From: Wang Siyuan Date: Tue, 9 Dec 2025 15:10:04 +0800 Subject: [PATCH 2/8] docs: add remote MCP http examples --- README.md | 13 +++++++++++++ docs/contributing.md | 3 +++ docs/getting-started.md | 20 ++++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/README.md b/README.md index e7caa80..24efe35 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,19 @@ with GlobalGPUController(gpu_ids=[0, 1], vram_to_keep="750MB", interval=90, busy url: http://127.0.0.1:8765/ adapter: http ``` +- Remote/SSH tunnel example (HTTP): + ```bash + keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765 + ``` + Client config (replace hostname/tunnel as needed): + ```yaml + servers: + keepgpu: + url: http://gpu-box.example.com:8765/ + adapter: http + ``` + For untrusted networks, put the server behind your own auth/reverse-proxy or + tunnel by way of SSH (for example, `ssh -L 8765:localhost:8765 gpu-box`). ## Contributing diff --git a/docs/contributing.md b/docs/contributing.md index e02de44..9d3f68f 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -48,6 +48,9 @@ expectations so you can get productive quickly and avoid surprises in CI. ```json {"id":1,"method":"start_keep","params":{"gpu_ids":[0],"vram":"512MB","interval":60,"busy_threshold":20}} ``` +- Remote tip: for shared clusters, prefer HTTP behind your own auth/reverse-proxy + or tunnel with SSH (`ssh -L 8765:localhost:8765 gpu-box`), then point your MCP + client at `http://127.0.0.1:8765/`. ## Pull requests diff --git a/docs/getting-started.md b/docs/getting-started.md index 8ff6ae9..1d12463 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -87,6 +87,26 @@ keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765 And point your MCP client at `http://127.0.0.1:8765/` (JSON-RPC 2.0 over POST). +### Remote/cluster usage + +- Start on the GPU host: + ```bash + keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765 + ``` +- Point your agent at the host: + ```yaml + servers: + keepgpu: + url: http://gpu-box.example.com:8765/ + adapter: http + ``` +- If the host is not on a trusted network, tunnel instead of exposing the port: + ```bash + ssh -L 8765:localhost:8765 gpu-box.example.com + ``` + Then use `http://127.0.0.1:8765/` in your MCP config. For multi-user clusters, + consider fronting the service with your own auth/reverse-proxy. + === "Editable dev install" ```bash git clone https://github.com/Wangmerlyn/KeepGPU.git From 9e4b35b343336af6014e6e527060ec0563eefd27 Mon Sep 17 00:00:00 2001 From: Wang Siyuan Date: Tue, 9 Dec 2025 15:17:02 +0800 Subject: [PATCH 3/8] Apply suggestions from code review Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- src/keep_gpu/mcp/server.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/keep_gpu/mcp/server.py b/src/keep_gpu/mcp/server.py index f2ccb56..81ed71c 100644 --- a/src/keep_gpu/mcp/server.py +++ b/src/keep_gpu/mcp/server.py @@ -150,14 +150,14 @@ class _JSONRPCHandler(BaseHTTPRequestHandler): server_version = "KeepGPU-MCP/0.1" def do_POST(self): # noqa: N802 - length = int(self.headers.get("content-length", "0")) - body = self.rfile.read(length).decode() try: + length = int(self.headers.get("content-length", "0")) + body = self.rfile.read(length).decode("utf-8") payload = json.loads(body) response = _handle_request(self.server.keepgpu_server, payload) # type: ignore[attr-defined] status = 200 - except Exception as exc: # pragma: no cover - defensive - response = {"error": {"message": str(exc)}} + except (json.JSONDecodeError, ValueError, UnicodeDecodeError) as exc: + response = {"error": {"message": f"Bad request: {exc}"}} status = 400 data = json.dumps(response).encode() self.send_response(status) @@ -194,7 +194,7 @@ class _Server(TCPServer): def _serve(): httpd.serve_forever() - thread = threading.Thread(target=_serve, daemon=True) + thread = threading.Thread(target=_serve) thread.start() logger.info( "MCP HTTP server listening on http://%s:%s", host, httpd.server_address[1] From a4a524e8d8d15cd6d76859397df4e15a1b9ebbf9 Mon Sep 17 00:00:00 2001 From: Wang Siyuan Date: Tue, 9 Dec 2025 15:34:44 +0800 Subject: [PATCH 4/8] docs: reorganize usage docs and add mcp guide --- docs/getting-started.md | 89 ++++++++--------------------------------- docs/guides/mcp.md | 85 +++++++++++++++++++++++++++++++++++++++ docs/index.md | 4 +- mkdocs.yml | 3 +- 4 files changed, 107 insertions(+), 74 deletions(-) create mode 100644 docs/guides/mcp.md diff --git a/docs/getting-started.md b/docs/getting-started.md index 1d12463..1576e2a 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -9,9 +9,10 @@ understand the minimum knobs you need to keep a GPU occupied. - Python 3.9+ (matching the version in your environment/cluster image). - Optional but recommended: `nvidia-smi` in `PATH` for utilization monitoring (CUDA) or `rocm-smi` if you install the `rocm` extra. -!!! warning "ROCm & multi-tenant clusters" - The current release focuses on CUDA devices. ROCm/AMD support is experimental; - controllers will raise `NotImplementedError` if CUDA is unavailable. +!!! info "Platforms" + CUDA is the primary path; ROCm is supported by way of the `rocm` extra + (requires a ROCm-enabled PyTorch build). CPU-only environments can import + the package but controllers will not start. ## Install @@ -39,74 +40,6 @@ understand the minimum knobs you need to keep a GPU occupied. pip install keep-gpu ``` -## For contributors - -- Install dev extras: `pip install -e ".[dev]"` (append `.[rocm]` if you need ROCm SMI). -- Fast CUDA checks: `pytest tests/cuda_controller tests/global_controller tests/utilities/test_platform_manager.py tests/test_cli_thresholds.py` -- ROCm-only tests are marked `rocm`; run with `pytest --run-rocm tests/rocm_controller`. - -## MCP endpoint (experimental) - -For automation clients that speak JSON-RPC (MCP-style), KeepGPU ships a tiny -stdin/stdout server: - -```bash -keep-gpu-mcp-server -# each request is a single JSON line; example: -echo '{"id":1,"method":"start_keep","params":{"gpu_ids":[0],"vram":"512MB","interval":60,"busy_threshold":20}}' | keep-gpu-mcp-server -``` - -Supported methods: -- `start_keep(gpu_ids?, vram?, interval?, busy_threshold?, job_id?)` -- `status(job_id?)` -- `stop_keep(job_id?)` (no job_id stops all) -- `list_gpus()` (basic info) - -### Example MCP client config (stdio) - -If your agent expects an MCP server definition, a minimal stdio config looks like: - -```yaml -servers: - keepgpu: - description: "KeepGPU MCP server" - command: ["keep-gpu-mcp-server"] - adapter: stdio -``` - -Tools exposed: `start_keep`, `stop_keep`, `status`, `list_gpus`. Each request is -a single JSON line; see above for an example payload. - -### HTTP transport - -Prefer TCP instead of stdio? Run: - -```bash -keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765 -``` - -And point your MCP client at `http://127.0.0.1:8765/` (JSON-RPC 2.0 over POST). - -### Remote/cluster usage - -- Start on the GPU host: - ```bash - keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765 - ``` -- Point your agent at the host: - ```yaml - servers: - keepgpu: - url: http://gpu-box.example.com:8765/ - adapter: http - ``` -- If the host is not on a trusted network, tunnel instead of exposing the port: - ```bash - ssh -L 8765:localhost:8765 gpu-box.example.com - ``` - Then use `http://127.0.0.1:8765/` in your MCP config. For multi-user clusters, - consider fronting the service with your own auth/reverse-proxy. - === "Editable dev install" ```bash git clone https://github.com/Wangmerlyn/KeepGPU.git @@ -114,6 +47,12 @@ And point your MCP client at `http://127.0.0.1:8765/` (JSON-RPC 2.0 over POST). pip install -e .[dev] ``` +## Pick your interface + +- **CLI** – fastest way to reserve GPUs from a shell; see [CLI Playbook](guides/cli.md). +- **Python module** – embed keep-alive loops inside orchestration code; see [Python API Recipes](guides/python.md). +- **MCP server** – expose KeepGPU over JSON-RPC (stdio or HTTP) for agents; see [MCP Server](guides/mcp.md). + ## Sanity check 1. Make sure PyTorch can see at least one device: @@ -149,7 +88,8 @@ ready to hand the GPU back, hit `Ctrl+C`—controllers will release VRAM and exi ## KeepGPU inside Python -The CLI wraps the same controllers you can import directly: +Prefer code-level control? Import the controllers directly (full recipes in +[Python API Recipes](guides/python.md)): ```python from keep_gpu.single_gpu_controller.cuda_gpu_controller import CudaGPUController @@ -171,3 +111,8 @@ with GlobalGPUController(gpu_ids=[0, 1], vram_to_keep="750MB", interval=60): From here, jump to the CLI Playbook for scenario-driven guidance or the API recipes if you need to embed KeepGPU in orchestration scripts. + +## For contributors + +Developing locally? See [Contributing](contributing.md) for dev install, test +commands (including CUDA/ROCm markers), and PR tips. diff --git a/docs/guides/mcp.md b/docs/guides/mcp.md new file mode 100644 index 0000000..ba05b68 --- /dev/null +++ b/docs/guides/mcp.md @@ -0,0 +1,85 @@ +# MCP Server + +Expose KeepGPU as a minimal JSON-RPC server (MCP-style) so agents or remote +orchestrators can start/stop keep-alive jobs and inspect GPU state. + +## When to use this + +- You run KeepGPU from an agent (LangChain, custom orchestrator, etc.) instead of a shell. +- You want to keep GPUs alive on a remote box over TCP rather than stdio. +- You need a quick way to list GPU utilization/memory via the same interface. + +## Quick start + +=== "stdio (default)" + ```bash + keep-gpu-mcp-server + ``` + Send one JSON request per line: + ```bash + echo '{"id":1,"method":"start_keep","params":{"gpu_ids":[0],"vram":"512MB","interval":60,"busy_threshold":20}}' | keep-gpu-mcp-server + ``` + +=== "HTTP" + ```bash + keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765 + curl -X POST http://127.0.0.1:8765/ \ + -H "content-type: application/json" \ + -d '{"id":1,"method":"status"}' + ``` + +Supported methods: + +- `start_keep(gpu_ids?, vram?, interval?, busy_threshold?, job_id?)` +- `stop_keep(job_id?)` (omit `job_id` to stop all) +- `status(job_id?)` (omit `job_id` to list active jobs) +- `list_gpus()` (detailed info via NVML/ROCm SMI/torch) + +## Client configs (MCP-style) + +=== "stdio adapter" + ```yaml + servers: + keepgpu: + description: "KeepGPU MCP server" + command: ["keep-gpu-mcp-server"] + adapter: stdio + ``` + +=== "HTTP adapter" + ```yaml + servers: + keepgpu: + url: http://127.0.0.1:8765/ + adapter: http + ``` + +## Remote/cluster usage + +- Run on the GPU host: + ```bash + keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765 + ``` +- Point your client at the host: + ```yaml + servers: + keepgpu: + url: http://gpu-box.example.com:8765/ + adapter: http + ``` +- If the network is untrusted, tunnel instead of exposing the port: + ```bash + ssh -L 8765:localhost:8765 gpu-box.example.com + ``` + Then use `http://127.0.0.1:8765/` in your MCP config. For multi-user clusters, + consider fronting the service with your own auth/reverse-proxy. + +## Responses you can expect + +```json +{"id":1,"result":{"job_id":""}} # start_keep +{"id":2,"result":{"stopped":[""]}} # stop_keep +{"id":3,"result":{"active":true,"job_id":"","params":{"gpu_ids":[0]}}} +{"id":4,"result":{"active_jobs":[{"job_id":"","params":{"gpu_ids":[0]}}]}} +{"id":5,"result":{"gpus":[{"id":0,"platform":"cuda","name":"A100","memory_total":...,"memory_used":...,"utilization":12}]}} +``` diff --git a/docs/index.md b/docs/index.md index 36bde1a..57938fb 100644 --- a/docs/index.md +++ b/docs/index.md @@ -32,11 +32,13 @@ during longer CPU-bound sections of your workflow. for pinning cards on clusters, workstations, or Jupyter. - :material-code-tags: **[Python API Recipes](guides/python.md)** – Drop-in snippets for wrapping preprocessing stages or orchestration scripts. +- :material-lan: **[MCP Server](guides/mcp.md)** – Expose KeepGPU by way of JSON-RPC + (stdio/HTTP) for agents and remote orchestration. - :material-diagram-project: **[How KeepGPU Works](concepts/architecture.md)** – Learn how controllers allocate VRAM and throttle themselves. - :material-book-open-outline: **[Reference](reference/cli.md)** – Full option list plus mkdocstrings API reference. !!! tip "Prefer a fast skim?" - The left sidebar mirrors the lifecycle: overview → guides → concepts → + The left sidebar mirrors the lifecycle: overview → usage → concepts → references. Jump straight to what you need; sections stand on their own. diff --git a/mkdocs.yml b/mkdocs.yml index d26c9b8..532338f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -15,9 +15,10 @@ nav: - Overview: - Welcome: index.md - Getting Started: getting-started.md - - Guides: + - Usage: - CLI Playbook: guides/cli.md - Python API Recipes: guides/python.md + - MCP Server: guides/mcp.md - Concepts: - How KeepGPU Works: concepts/architecture.md - Reference: From 88d56c8aa434777d24233dddf6d348bfa20e1fdd Mon Sep 17 00:00:00 2001 From: "coderabbitai[bot]" <136622811+coderabbitai[bot]@users.noreply.github.com> Date: Tue, 9 Dec 2025 16:56:19 +0800 Subject: [PATCH 5/8] [docs] chore: add docstrings to `mcp-http-transport` (#61) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📝 Add docstrings to `mcp-http-transport` Docstrings generation was requested by @Wangmerlyn. * https://github.com/Wangmerlyn/KeepGPU/pull/60#issuecomment-3630698395 The following files were modified: * `src/keep_gpu/mcp/server.py` * chore: format mcp server docstrings * style: shorten mcp server docstrings for hooks * docs: restore detailed mcp server docstrings * docs: rephrase mcp guide for docstyle hook --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Co-authored-by: Wang Siyuan --- docs/guides/mcp.md | 4 +-- src/keep_gpu/mcp/server.py | 56 +++++++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/docs/guides/mcp.md b/docs/guides/mcp.md index ba05b68..f52d047 100644 --- a/docs/guides/mcp.md +++ b/docs/guides/mcp.md @@ -7,7 +7,7 @@ orchestrators can start/stop keep-alive jobs and inspect GPU state. - You run KeepGPU from an agent (LangChain, custom orchestrator, etc.) instead of a shell. - You want to keep GPUs alive on a remote box over TCP rather than stdio. -- You need a quick way to list GPU utilization/memory via the same interface. +- You need a quick way to list GPU utilization/memory by way of the same interface. ## Quick start @@ -33,7 +33,7 @@ Supported methods: - `start_keep(gpu_ids?, vram?, interval?, busy_threshold?, job_id?)` - `stop_keep(job_id?)` (omit `job_id` to stop all) - `status(job_id?)` (omit `job_id` to list active jobs) -- `list_gpus()` (detailed info via NVML/ROCm SMI/torch) +- `list_gpus()` (detailed info by way of NVML/ROCm SMI/torch) ## Client configs (MCP-style) diff --git a/src/keep_gpu/mcp/server.py b/src/keep_gpu/mcp/server.py index 81ed71c..e4d255a 100644 --- a/src/keep_gpu/mcp/server.py +++ b/src/keep_gpu/mcp/server.py @@ -52,6 +52,22 @@ def start_keep( busy_threshold: int = -1, job_id: Optional[str] = None, ) -> Dict[str, Any]: + """ + Start a KeepGPU session that reserves VRAM on one or more GPUs. + + Args: + gpu_ids: GPU indices to target; None uses all available GPUs. + vram: Human-readable VRAM size to keep (for example, "1GiB"). + interval: Seconds between controller checks/actions. + busy_threshold: Utilization above which the controller backs off. + job_id: Optional session identifier; a UUID is generated if omitted. + + Returns: + Dict with the started session's job_id, e.g. ``{"job_id": ""}``. + + Raises: + ValueError: If the provided job_id already exists. + """ job_id = job_id or str(uuid.uuid4()) if job_id in self._sessions: raise ValueError(f"job_id {job_id} already exists") @@ -78,6 +94,20 @@ def start_keep( def stop_keep( self, job_id: Optional[str] = None, quiet: bool = False ) -> Dict[str, Any]: + """ + Stop one or all active keep sessions. + + If job_id is supplied, only that session is stopped; otherwise all active + sessions are released. When quiet=True, informational logging is skipped. + + Args: + job_id: Session identifier to stop; None stops every session. + quiet: Suppress informational logs about stopped sessions. + + Returns: + Dict with a "stopped" list of job ids. If a specific job_id was not + found, a "message" field explains the miss. + """ if job_id: session = self._sessions.pop(job_id, None) if session: @@ -118,6 +148,7 @@ def list_gpus(self) -> Dict[str, Any]: return {"gpus": infos} def shutdown(self) -> None: + """Stop all sessions quietly; ignore errors during interpreter teardown.""" try: self.stop_keep(None, quiet=True) except Exception: # pragma: no cover - defensive @@ -126,6 +157,16 @@ def shutdown(self) -> None: def _handle_request(server: KeepGPUServer, payload: Dict[str, Any]) -> Dict[str, Any]: + """ + Dispatch a JSON-RPC payload to the server and return a response dict. + + Args: + server: Target KeepGPUServer. + payload: Dict with "method", optional "params", and optional "id". + + Returns: + JSON-RPC-style dict containing either "result" or "error" plus "id". + """ method = payload.get("method") params = payload.get("params", {}) or {} req_id = payload.get("id") @@ -150,11 +191,18 @@ class _JSONRPCHandler(BaseHTTPRequestHandler): server_version = "KeepGPU-MCP/0.1" def do_POST(self): # noqa: N802 + """ + Handle an HTTP JSON-RPC request and write a JSON response. + + Expects application/json bodies containing {"method", "params", "id"}. + Returns 400 with an error object if parsing fails. + """ try: length = int(self.headers.get("content-length", "0")) body = self.rfile.read(length).decode("utf-8") payload = json.loads(body) - response = _handle_request(self.server.keepgpu_server, payload) # type: ignore[attr-defined] + server_ref = self.server.keepgpu_server # type: ignore[attr-defined] + response = _handle_request(server_ref, payload) status = 200 except (json.JSONDecodeError, ValueError, UnicodeDecodeError) as exc: response = {"error": {"message": f"Bad request: {exc}"}} @@ -167,10 +215,12 @@ def do_POST(self): # noqa: N802 self.wfile.write(data) def log_message(self, format, *args): # noqa: A003 + """Suppress default request logging.""" return def run_stdio(server: KeepGPUServer) -> None: + """Serve JSON-RPC requests over stdin/stdout (one JSON object per line).""" for line in sys.stdin: line = line.strip() if not line: @@ -185,6 +235,8 @@ def run_stdio(server: KeepGPUServer) -> None: def run_http(server: KeepGPUServer, host: str = "127.0.0.1", port: int = 8765) -> None: + """Run a lightweight HTTP JSON-RPC server on the given host/port.""" + class _Server(TCPServer): allow_reuse_address = True @@ -192,6 +244,7 @@ class _Server(TCPServer): httpd.keepgpu_server = server # type: ignore[attr-defined] def _serve(): + """Run the HTTP server loop until shutdown.""" httpd.serve_forever() thread = threading.Thread(target=_serve) @@ -210,6 +263,7 @@ def _serve(): def main() -> None: + """CLI entry point for the KeepGPU MCP server.""" parser = argparse.ArgumentParser(description="KeepGPU MCP server") parser.add_argument( "--mode", From 832f7c8007b9b33a623f06aaa61e8652f8e13ca8 Mon Sep 17 00:00:00 2001 From: Wang Siyuan Date: Tue, 9 Dec 2025 17:03:24 +0800 Subject: [PATCH 6/8] docs: fix mcp quick-start tabs rendering --- docs/guides/mcp.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/guides/mcp.md b/docs/guides/mcp.md index f52d047..f8b437c 100644 --- a/docs/guides/mcp.md +++ b/docs/guides/mcp.md @@ -15,7 +15,9 @@ orchestrators can start/stop keep-alive jobs and inspect GPU state. ```bash keep-gpu-mcp-server ``` + Send one JSON request per line: + ```bash echo '{"id":1,"method":"start_keep","params":{"gpu_ids":[0],"vram":"512MB","interval":60,"busy_threshold":20}}' | keep-gpu-mcp-server ``` @@ -23,6 +25,11 @@ orchestrators can start/stop keep-alive jobs and inspect GPU state. === "HTTP" ```bash keep-gpu-mcp-server --mode http --host 0.0.0.0 --port 8765 + ``` + + Query it over HTTP: + + ```bash curl -X POST http://127.0.0.1:8765/ \ -H "content-type: application/json" \ -d '{"id":1,"method":"status"}' From 38fdd4946e586421b3cc5343d3ec0ceff1fce602 Mon Sep 17 00:00:00 2001 From: Wang Siyuan Date: Tue, 9 Dec 2025 17:17:13 +0800 Subject: [PATCH 7/8] docs: add mkdocs dev commands and link contributing --- README.md | 2 +- docs/contributing.md | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 24efe35..3dcceb7 100644 --- a/README.md +++ b/README.md @@ -136,7 +136,7 @@ with GlobalGPUController(gpu_ids=[0, 1], vram_to_keep="750MB", interval=90, busy ## Contributing Contributions are welcome—especially around ROCm support, platform fallbacks, and scheduler-specific recipes. Open an issue or PR if you hit edge cases on your cluster. -See `docs/contributing.md` for dev setup, test commands, and PR tips. +See [docs/contributing.md](docs/contributing.md) for dev setup, test commands, and PR tips. ## Credits diff --git a/docs/contributing.md b/docs/contributing.md index 9d3f68f..03ff180 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -39,6 +39,17 @@ expectations so you can get productive quickly and avoid surprises in CI. pre-commit run --all-files ``` +## Docs + +- Live preview: + ```bash + mkdocs serve + ``` +- Build the static site: + ```bash + mkdocs build + ``` + ## MCP server (experimental) - Start: `keep-gpu-mcp-server` (stdin/stdout JSON-RPC) From c84f3eb7043395bb389909616004a884e76c8275 Mon Sep 17 00:00:00 2001 From: Wang Siyuan Date: Tue, 9 Dec 2025 17:28:02 +0800 Subject: [PATCH 8/8] docs: enable pymdownx tabbed for MCP quick-start --- mkdocs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mkdocs.yml b/mkdocs.yml index 532338f..210f6df 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -35,6 +35,8 @@ plugins: markdown_extensions: - admonition - codehilite + - pymdownx.tabbed: + alternate_style: true - toc: permalink: true - pymdownx.emoji: