From 5dffa5d092211876edafa4c99dc682c894ce7ef9 Mon Sep 17 00:00:00 2001 From: Matthew O'Riordan Date: Mon, 29 Dec 2025 11:39:34 +0100 Subject: [PATCH 1/4] feat: add external browser mode for Chrome for Testing support Add serveWithExternalBrowser() that connects to an existing browser via CDP instead of launching Playwright's Chromium. Key features: - Connect to any browser with CDP enabled (Chrome for Testing, Chrome Beta, etc.) - Auto-launch browser if not running (with BROWSER_PATH env var) - Browser stays open after server stops (user manages lifecycle) - No extension required - direct CDP connection New files: - src/external-browser.ts - Core implementation - scripts/start-external-browser.ts - Startup script Use case: Local development with visible browser automation where you want to inspect results after automation completes. --- skills/dev-browser/SKILL.md | 45 ++- .../scripts/start-external-browser.ts | 80 ++++ skills/dev-browser/src/external-browser.ts | 350 ++++++++++++++++++ skills/dev-browser/src/index.ts | 7 + 4 files changed, 481 insertions(+), 1 deletion(-) create mode 100644 skills/dev-browser/scripts/start-external-browser.ts create mode 100644 skills/dev-browser/src/external-browser.ts diff --git a/skills/dev-browser/SKILL.md b/skills/dev-browser/SKILL.md index 21e4bd4..4bed812 100644 --- a/skills/dev-browser/SKILL.md +++ b/skills/dev-browser/SKILL.md @@ -15,7 +15,7 @@ Browser automation that maintains page state across script executions. Write sma ## Setup -Two modes available. Ask the user if unclear which to use. +Three modes available. Ask the user if unclear which to use. ### Standalone Mode (Default) @@ -27,6 +27,49 @@ Launches a new Chromium browser for fresh automation sessions. Add `--headless` flag if user requests it. **Wait for the `Ready` message before running scripts.** +### External Browser Mode + +Connects to an external browser (like Chrome for Testing) via Chrome DevTools Protocol (CDP). Use this when: + +- User wants to use a specific browser build (Chrome for Testing, Chrome Beta, etc.) +- User wants the browser to stay open after automation for manual inspection +- User wants visible browser automation for local development +- No extension installation required + +**Start the server:** + +```bash +cd skills/dev-browser && BROWSER_PATH="/path/to/chrome" npx tsx scripts/start-external-browser.ts & +``` + +**Environment variables:** +- `PORT` - HTTP API port (default: 9222) +- `CDP_PORT` - Browser's CDP port (default: 9223) +- `BROWSER_PATH` - Path to browser executable (enables auto-launch) +- `USER_DATA_DIR` - Browser profile directory (default: ~/.dev-browser-profile) +- `AUTO_LAUNCH` - Auto-launch browser if not running (default: true) + +**Example with Chrome for Testing (macOS):** + +```bash +BROWSER_PATH="/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing" \ +npx tsx scripts/start-external-browser.ts & +``` + +**Or start the browser manually first:** + +```bash +# Start Chrome for Testing with CDP enabled +"/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing" \ + --remote-debugging-port=9223 \ + --user-data-dir=~/.chrome-for-testing-data & + +# Then start the dev-browser server (no BROWSER_PATH needed) +cd skills/dev-browser && npx tsx scripts/start-external-browser.ts & +``` + +**Key difference:** When you stop the dev-browser server, the browser stays open. This is by design—you manage the browser lifecycle, dev-browser just connects to it. + ### Extension Mode Connects to user's existing Chrome browser. Use this when: diff --git a/skills/dev-browser/scripts/start-external-browser.ts b/skills/dev-browser/scripts/start-external-browser.ts new file mode 100644 index 0000000..bdd0e59 --- /dev/null +++ b/skills/dev-browser/scripts/start-external-browser.ts @@ -0,0 +1,80 @@ +/** + * Start dev-browser server connecting to an external browser via CDP. + * + * This mode is ideal for: + * - Chrome for Testing or other specific browser builds + * - Development workflows where you want the browser visible + * - Keeping the browser open after automation for manual inspection + * + * Environment variables: + * PORT - HTTP API port (default: 9222) + * CDP_PORT - Browser's CDP port (default: 9223) + * BROWSER_PATH - Path to browser executable (for auto-launch) + * USER_DATA_DIR - Browser profile directory (default: ~/.dev-browser-profile) + * AUTO_LAUNCH - Whether to auto-launch browser if not running (default: true) + * + * Example with Chrome for Testing: + * BROWSER_PATH="/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing" \ + * npx tsx scripts/start-external-browser.ts + */ + +import { serveWithExternalBrowser } from "@/external-browser.js"; +import { mkdirSync } from "fs"; +import { join, dirname } from "path"; +import { fileURLToPath } from "url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const tmpDir = join(__dirname, "..", "tmp"); + +// Create tmp directory if it doesn't exist +console.log("Creating tmp directory..."); +mkdirSync(tmpDir, { recursive: true }); + +// Configuration from environment +const port = parseInt(process.env.PORT || "9222", 10); +const cdpPort = parseInt(process.env.CDP_PORT || "9223", 10); +const browserPath = process.env.BROWSER_PATH; +const userDataDir = process.env.USER_DATA_DIR || `${process.env.HOME}/.dev-browser-profile`; +const autoLaunch = process.env.AUTO_LAUNCH !== "false"; + +console.log("Starting dev-browser with external browser mode..."); +console.log(` HTTP API port: ${port}`); +console.log(` CDP port: ${cdpPort}`); +if (browserPath) { + console.log(` Browser path: ${browserPath}`); +} +console.log(` User data dir: ${userDataDir}`); +console.log(` Auto-launch: ${autoLaunch}`); +console.log(""); + +// Check if our HTTP API server is already running +console.log("Checking for existing servers..."); +try { + const res = await fetch(`http://localhost:${port}`, { + signal: AbortSignal.timeout(1000), + }); + if (res.ok) { + console.log(`Server already running on port ${port}`); + process.exit(0); + } +} catch { + // Server not running, continue to start +} + +const server = await serveWithExternalBrowser({ + port, + cdpPort, + browserPath, + userDataDir, + autoLaunch, +}); + +console.log(`\nDev browser server started`); +console.log(` WebSocket: ${server.wsEndpoint}`); +console.log(` Mode: ${server.mode}`); +console.log(` Tmp directory: ${tmpDir}`); +console.log(`\nReady`); +console.log(`\nPress Ctrl+C to stop (browser will remain open)`); + +// Keep the process running +await new Promise(() => {}); diff --git a/skills/dev-browser/src/external-browser.ts b/skills/dev-browser/src/external-browser.ts new file mode 100644 index 0000000..2ca0b5a --- /dev/null +++ b/skills/dev-browser/src/external-browser.ts @@ -0,0 +1,350 @@ +import express, { type Express, type Request, type Response } from "express"; +import { chromium, type Browser, type BrowserContext, type Page } from "playwright"; +import { spawn, execSync } from "child_process"; +import type { Socket } from "net"; +import type { + GetPageRequest, + GetPageResponse, + ListPagesResponse, + ServerInfoResponse, +} from "./types"; + +export interface ExternalBrowserOptions { + /** HTTP API port (default: 9222) */ + port?: number; + /** CDP port where external browser is listening (default: 9223) */ + cdpPort?: number; + /** Path to browser executable (for auto-launch) */ + browserPath?: string; + /** User data directory for browser profile (for auto-launch) */ + userDataDir?: string; + /** Whether to auto-launch browser if not running (default: true) */ + autoLaunch?: boolean; +} + +export interface ExternalBrowserServer { + wsEndpoint: string; + port: number; + mode: "external-browser"; + stop: () => Promise; +} + +/** + * Check if a browser is running on the specified CDP port + */ +async function isBrowserRunning(cdpPort: number): Promise { + try { + const res = await fetch(`http://127.0.0.1:${cdpPort}/json/version`, { + signal: AbortSignal.timeout(2000), + }); + return res.ok; + } catch { + return false; + } +} + +/** + * Get the CDP WebSocket endpoint from a running browser + */ +async function getCdpEndpoint(cdpPort: number, maxRetries = 60): Promise { + for (let i = 0; i < maxRetries; i++) { + try { + const res = await fetch(`http://127.0.0.1:${cdpPort}/json/version`, { + signal: AbortSignal.timeout(2000), + }); + if (res.ok) { + const data = (await res.json()) as { webSocketDebuggerUrl: string }; + return data.webSocketDebuggerUrl; + } + } catch { + // Browser not ready yet + } + await new Promise((resolve) => setTimeout(resolve, 500)); + } + throw new Error(`Browser did not start on port ${cdpPort} within ${maxRetries * 0.5}s`); +} + +/** + * Launch browser as a detached process (survives server shutdown) + */ +function launchBrowserDetached( + browserPath: string, + cdpPort: number, + userDataDir: string +): void { + const args = [ + `--remote-debugging-port=${cdpPort}`, + `--user-data-dir=${userDataDir}`, + "--no-first-run", + "--no-default-browser-check", + ]; + + console.log(`Launching browser: ${browserPath}`); + console.log(` CDP port: ${cdpPort}`); + console.log(` User data: ${userDataDir}`); + + const child = spawn(browserPath, args, { + detached: true, + stdio: "ignore", + }); + child.unref(); +} + +/** + * Helper to add timeout to promises + */ +function withTimeout(promise: Promise, ms: number, message: string): Promise { + return Promise.race([ + promise, + new Promise((_, reject) => + setTimeout(() => reject(new Error(`Timeout: ${message}`)), ms) + ), + ]); +} + +/** + * Serve dev-browser by connecting to an external browser via CDP. + * + * This mode is ideal for: + * - Using Chrome for Testing or other specific browser builds + * - Keeping the browser open after automation (for manual inspection) + * - Development workflows where you want to see automation in a visible browser + * + * The browser lifecycle is managed externally - this server only connects/disconnects. + */ +export async function serveWithExternalBrowser( + options: ExternalBrowserOptions = {} +): Promise { + const port = options.port ?? 9222; + const cdpPort = options.cdpPort ?? 9223; + const autoLaunch = options.autoLaunch ?? true; + const browserPath = options.browserPath; + const userDataDir = options.userDataDir ?? `${process.env.HOME}/.dev-browser-profile`; + + // Validate port numbers + if (port < 1 || port > 65535) { + throw new Error(`Invalid port: ${port}. Must be between 1 and 65535`); + } + if (cdpPort < 1 || cdpPort > 65535) { + throw new Error(`Invalid cdpPort: ${cdpPort}. Must be between 1 and 65535`); + } + if (port === cdpPort) { + throw new Error("port and cdpPort must be different"); + } + + // Check if browser is running, optionally launch it + const running = await isBrowserRunning(cdpPort); + + if (!running) { + if (autoLaunch && browserPath) { + console.log(`Browser not running on port ${cdpPort}, launching...`); + launchBrowserDetached(browserPath, cdpPort, userDataDir); + } else if (autoLaunch && !browserPath) { + throw new Error( + `Browser not running on port ${cdpPort} and no browserPath provided for auto-launch. ` + + `Either start the browser manually with --remote-debugging-port=${cdpPort} or provide browserPath.` + ); + } else { + throw new Error( + `Browser not running on port ${cdpPort}. ` + + `Start it with --remote-debugging-port=${cdpPort}` + ); + } + } else { + console.log(`Browser already running on port ${cdpPort}`); + } + + // Wait for CDP endpoint + console.log("Waiting for CDP endpoint..."); + const wsEndpoint = await getCdpEndpoint(cdpPort); + console.log(`CDP WebSocket endpoint: ${wsEndpoint}`); + + // Connect to the browser via CDP + console.log("Connecting to browser via CDP..."); + const browser: Browser = await chromium.connectOverCDP(`http://127.0.0.1:${cdpPort}`); + console.log("Connected to external browser"); + + // Get the default context (user's browsing context) + const contexts = browser.contexts(); + const context: BrowserContext = contexts[0] || await browser.newContext(); + + // Registry entry type for page tracking + interface PageEntry { + page: Page; + targetId: string; + } + + // Registry: name -> PageEntry + const registry = new Map(); + + // Helper to get CDP targetId for a page + async function getTargetId(page: Page): Promise { + const cdpSession = await context.newCDPSession(page); + try { + const { targetInfo } = await cdpSession.send("Target.getTargetInfo"); + return targetInfo.targetId; + } finally { + await cdpSession.detach(); + } + } + + // Express server for page management + const app: Express = express(); + app.use(express.json()); + + // GET / - server info + app.get("/", (_req: Request, res: Response) => { + const response: ServerInfoResponse & { mode: string } = { + wsEndpoint, + mode: "external-browser", + }; + res.json(response); + }); + + // GET /pages - list all pages + app.get("/pages", (_req: Request, res: Response) => { + const response: ListPagesResponse = { + pages: Array.from(registry.keys()), + }; + res.json(response); + }); + + // POST /pages - get or create page + app.post("/pages", async (req: Request, res: Response) => { + const body = req.body as GetPageRequest; + const { name } = body; + + if (!name || typeof name !== "string") { + res.status(400).json({ error: "name is required and must be a string" }); + return; + } + + if (name.length === 0) { + res.status(400).json({ error: "name cannot be empty" }); + return; + } + + if (name.length > 256) { + res.status(400).json({ error: "name must be 256 characters or less" }); + return; + } + + // Check if page already exists + let entry = registry.get(name); + if (!entry) { + // Create new page in the context (with timeout to prevent hangs) + const page = await withTimeout(context.newPage(), 30000, "Page creation timed out after 30s"); + const targetId = await getTargetId(page); + entry = { page, targetId }; + registry.set(name, entry); + + // Clean up registry when page is closed (e.g., user clicks X) + page.on("close", () => { + registry.delete(name); + }); + } + + const response: GetPageResponse = { wsEndpoint, name, targetId: entry.targetId }; + res.json(response); + }); + + // DELETE /pages/:name - close a page + app.delete("/pages/:name", async (req: Request<{ name: string }>, res: Response) => { + const name = decodeURIComponent(req.params.name); + const entry = registry.get(name); + + if (entry) { + await entry.page.close(); + registry.delete(name); + res.json({ success: true }); + return; + } + + res.status(404).json({ error: "page not found" }); + }); + + // Start the server + const server = app.listen(port, () => { + console.log(`HTTP API server running on port ${port}`); + }); + + // Track active connections for clean shutdown + const connections = new Set(); + server.on("connection", (socket: Socket) => { + connections.add(socket); + socket.on("close", () => connections.delete(socket)); + }); + + // Track if cleanup has been called to avoid double cleanup + let cleaningUp = false; + + // Cleanup function - disconnects but does NOT close the browser + const cleanup = async () => { + if (cleaningUp) return; + cleaningUp = true; + + console.log("\nShutting down..."); + + // Close all active HTTP connections + for (const socket of connections) { + socket.destroy(); + } + connections.clear(); + + // Close managed pages (pages we created, not user's existing tabs) + for (const entry of registry.values()) { + try { + await entry.page.close(); + } catch { + // Page might already be closed + } + } + registry.clear(); + + // Disconnect from browser (does NOT close it) + try { + await browser.close(); + } catch { + // Already disconnected + } + + server.close(); + console.log("Server stopped. Browser remains open."); + }; + + // Signal handlers + const signals = ["SIGINT", "SIGTERM", "SIGHUP"] as const; + + const signalHandler = async () => { + await cleanup(); + process.exit(0); + }; + + const errorHandler = async (err: unknown) => { + console.error("Unhandled error:", err); + await cleanup(); + process.exit(1); + }; + + // Register handlers + signals.forEach((sig) => process.on(sig, signalHandler)); + process.on("uncaughtException", errorHandler); + process.on("unhandledRejection", errorHandler); + + // Helper to remove all handlers + const removeHandlers = () => { + signals.forEach((sig) => process.off(sig, signalHandler)); + process.off("uncaughtException", errorHandler); + process.off("unhandledRejection", errorHandler); + }; + + return { + wsEndpoint, + port, + mode: "external-browser", + async stop() { + removeHandlers(); + await cleanup(); + }, + }; +} diff --git a/skills/dev-browser/src/index.ts b/skills/dev-browser/src/index.ts index 24fd619..d94cf8f 100644 --- a/skills/dev-browser/src/index.ts +++ b/skills/dev-browser/src/index.ts @@ -13,6 +13,13 @@ import type { export type { ServeOptions, GetPageResponse, ListPagesResponse, ServerInfoResponse }; +// Re-export external browser mode +export { + serveWithExternalBrowser, + type ExternalBrowserOptions, + type ExternalBrowserServer, +} from "./external-browser.js"; + export interface DevBrowserServer { wsEndpoint: string; port: number; From d2b6c45d2a18dfd1828b1b858cc8ed3ed384c56f Mon Sep 17 00:00:00 2001 From: Matthew O'Riordan Date: Tue, 30 Dec 2025 11:50:17 +0100 Subject: [PATCH 2/4] feat: add multi-agent concurrency support with dynamic port allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When multiple AI agents run browser automation tasks in parallel, they need separate HTTP API ports while potentially sharing the same browser instance. This adds automatic port allocation to avoid conflicts. Key changes: - Add port-manager.ts for dynamic port allocation (range 9222-9300) - Server tracking via ~/.dev-browser/active-servers.json - PORT=XXXX output for agent discovery - Config file support at ~/.dev-browser/config.json - Update both standalone and external browser modes Architecture: Agent 1 → server (port 9222) ┐ Agent 2 → server (port 9224) ├→ Shared Browser (CDP 9223) Agent 3 → server (port 9226) ┘ See docs/CONCURRENCY.md for design decisions and usage examples. Addresses concerns raised in PR #15 about single-point congestion. --- skills/dev-browser/docs/CONCURRENCY.md | 177 ++++++++++++++ .../scripts/start-external-browser.ts | 56 +++-- skills/dev-browser/scripts/start-server.ts | 75 +++--- skills/dev-browser/src/external-browser.ts | 36 ++- skills/dev-browser/src/index.ts | 32 ++- skills/dev-browser/src/port-manager.ts | 226 ++++++++++++++++++ 6 files changed, 539 insertions(+), 63 deletions(-) create mode 100644 skills/dev-browser/docs/CONCURRENCY.md create mode 100644 skills/dev-browser/src/port-manager.ts diff --git a/skills/dev-browser/docs/CONCURRENCY.md b/skills/dev-browser/docs/CONCURRENCY.md new file mode 100644 index 0000000..37f8267 --- /dev/null +++ b/skills/dev-browser/docs/CONCURRENCY.md @@ -0,0 +1,177 @@ +# Multi-Agent Concurrency Support + +This document explains how dev-browser supports multiple concurrent agents and the design decisions behind the implementation. + +## The Problem + +When multiple AI agents (e.g., Claude Code sub-agents) run browser automation tasks in parallel, they need to avoid conflicts. The original dev-browser design assumed a single server on a fixed port, which creates a bottleneck: + +> "dev-browser is in fact a single point of congestion now, nullifying the advantages of dev browser" +> — [PR #15 discussion](https://github.com/SawyerHood/dev-browser/pull/15#issuecomment-3698722432) + +## Solution: Dynamic Port Allocation + +Each agent automatically gets its own HTTP API server on a unique port: + +``` +Agent 1 ──► server (port 9222) ──┐ +Agent 2 ──► server (port 9224) ──┼──► Shared Browser (CDP 9223) +Agent 3 ──► server (port 9226) ──┘ +``` + +### How It Works + +1. **Port Auto-Assignment**: When `port` is not specified, the server finds an available port in the configured range (default: 9222-9300, step 2) + +2. **Port Discovery**: Server outputs `PORT=XXXX` to stdout, which agents parse to know which port to connect to + +3. **Server Tracking**: Active servers are tracked in `~/.dev-browser/active-servers.json` for coordination + +4. **Shared Browser**: In external browser mode, all servers connect to the same browser via CDP, minimizing resource usage + +## Design Decisions + +### Options Considered + +#### Option 1: Manual Port Assignment (Rejected) + +From [PR #15](https://github.com/SawyerHood/dev-browser/pull/15), the initial proposal was to add `--port` and `--cdp-port` CLI flags for manual assignment. + +**Why rejected**: Requires agents to coordinate port selection, adds complexity to agent implementation, and creates potential for conflicts. + +#### Option 2: Singleton Server with Named Pages (Rejected) + +Have one persistent server handling all agents, using page names for isolation. + +**Why rejected**: Incompatible with the plugin architecture where each agent spawns its own server process. Also creates a true single point of failure. + +#### Option 3: Dynamic Port Allocation (Chosen) + +Servers automatically discover and claim available ports. + +**Why chosen**: +- Zero configuration required +- Agents don't need to coordinate +- Works with existing plugin architecture +- Each agent is isolated (failure doesn't affect others) +- Memory overhead is acceptable (~140MB per server) + +### Memory Considerations + +Each dev-browser server uses approximately: +- **Node.js + Playwright + Express**: ~140MB +- **Browser (if standalone mode)**: ~300MB additional + +In external browser mode, multiple servers share one browser, making the per-agent overhead just ~140MB. + +## Configuration + +Create `~/.dev-browser/config.json` to customize behavior: + +```json +{ + "portRange": { + "start": 9222, + "end": 9300, + "step": 2 + }, + "cdpPort": 9223 +} +``` + +| Option | Default | Description | +|--------|---------|-------------| +| `portRange.start` | 9222 | First port to try for HTTP API | +| `portRange.end` | 9300 | Last port to try | +| `portRange.step` | 2 | Port increment (avoids CDP port collision) | +| `cdpPort` | 9223 | Chrome DevTools Protocol port | + +## Usage Examples + +### Multiple Agents (External Browser Mode) + +```bash +# Terminal 1: Start Chrome for Testing, then: +BROWSER_PATH="/path/to/chrome" npx tsx scripts/start-external-browser.ts +# Output: PORT=9222 + +# Terminal 2: Second agent +npx tsx scripts/start-external-browser.ts +# Output: PORT=9224 + +# Terminal 3: Third agent +npx tsx scripts/start-external-browser.ts +# Output: PORT=9226 + +# All agents share the same browser on CDP port 9223 +``` + +### Multiple Agents (Standalone Mode) + +```bash +# Terminal 1: First agent launches its own browser +npx tsx scripts/start-server.ts +# Output: PORT=9222 + +# Terminal 2: Second agent launches separate browser +npx tsx scripts/start-server.ts +# Output: PORT=9224 +``` + +### Programmatic Usage + +```typescript +import { serve, serveWithExternalBrowser } from "dev-browser"; + +// Port is automatically assigned +const server1 = await serve(); // Gets port 9222 +const server2 = await serve(); // Gets port 9224 + +console.log(`Server 1 on port ${server1.port}`); +console.log(`Server 2 on port ${server2.port}`); + +// Or with external browser +const external1 = await serveWithExternalBrowser(); +const external2 = await serveWithExternalBrowser(); +// Both connect to same browser on CDP 9223 +``` + +## Troubleshooting + +### "No available ports in range" + +Too many servers running. Check active servers: + +```bash +cat ~/.dev-browser/active-servers.json +``` + +Clean up stale entries (servers that crashed): + +```bash +rm ~/.dev-browser/active-servers.json +``` + +### Port Conflicts + +If a specific port is required, set `PORT` environment variable: + +```bash +PORT=9250 npx tsx scripts/start-external-browser.ts +``` + +### Checking Server Status + +```bash +# List all active servers +cat ~/.dev-browser/active-servers.json + +# Test a specific server +curl http://localhost:9222/ +# Returns: {"wsEndpoint":"ws://...","mode":"external-browser","port":9222} +``` + +## References + +- [PR #15: Multi-port support discussion](https://github.com/SawyerHood/dev-browser/pull/15) +- [PR #20: External browser mode](https://github.com/SawyerHood/dev-browser/pull/20) diff --git a/skills/dev-browser/scripts/start-external-browser.ts b/skills/dev-browser/scripts/start-external-browser.ts index bdd0e59..18484a6 100644 --- a/skills/dev-browser/scripts/start-external-browser.ts +++ b/skills/dev-browser/scripts/start-external-browser.ts @@ -5,17 +5,35 @@ * - Chrome for Testing or other specific browser builds * - Development workflows where you want the browser visible * - Keeping the browser open after automation for manual inspection + * - Running multiple agents concurrently (each gets its own port automatically) * * Environment variables: - * PORT - HTTP API port (default: 9222) + * PORT - HTTP API port (default: auto-assigned from 9222-9300) * CDP_PORT - Browser's CDP port (default: 9223) * BROWSER_PATH - Path to browser executable (for auto-launch) * USER_DATA_DIR - Browser profile directory (default: ~/.dev-browser-profile) * AUTO_LAUNCH - Whether to auto-launch browser if not running (default: true) * + * Configuration file: ~/.dev-browser/config.json + * { + * "portRange": { "start": 9222, "end": 9300, "step": 2 }, + * "cdpPort": 9223 + * } + * * Example with Chrome for Testing: * BROWSER_PATH="/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing" \ * npx tsx scripts/start-external-browser.ts + * + * Multi-agent usage: + * # Terminal 1: First agent gets port 9222 + * npx tsx scripts/start-external-browser.ts + * # Output: PORT=9222 + * + * # Terminal 2: Second agent gets port 9224 + * npx tsx scripts/start-external-browser.ts + * # Output: PORT=9224 + * + * # Both agents share the same browser on CDP port 9223 */ import { serveWithExternalBrowser } from "@/external-browser.js"; @@ -27,40 +45,26 @@ const __dirname = dirname(fileURLToPath(import.meta.url)); const tmpDir = join(__dirname, "..", "tmp"); // Create tmp directory if it doesn't exist -console.log("Creating tmp directory..."); mkdirSync(tmpDir, { recursive: true }); -// Configuration from environment -const port = parseInt(process.env.PORT || "9222", 10); -const cdpPort = parseInt(process.env.CDP_PORT || "9223", 10); +// Configuration from environment (PORT is optional - will be auto-assigned) +const port = process.env.PORT ? parseInt(process.env.PORT, 10) : undefined; +const cdpPort = process.env.CDP_PORT ? parseInt(process.env.CDP_PORT, 10) : undefined; const browserPath = process.env.BROWSER_PATH; const userDataDir = process.env.USER_DATA_DIR || `${process.env.HOME}/.dev-browser-profile`; const autoLaunch = process.env.AUTO_LAUNCH !== "false"; console.log("Starting dev-browser with external browser mode..."); -console.log(` HTTP API port: ${port}`); -console.log(` CDP port: ${cdpPort}`); +console.log(` HTTP API port: ${port ?? "auto (dynamic)"}`); +console.log(` CDP port: ${cdpPort ?? "from config (default: 9223)"}`); if (browserPath) { console.log(` Browser path: ${browserPath}`); } console.log(` User data dir: ${userDataDir}`); console.log(` Auto-launch: ${autoLaunch}`); +console.log(` Config: ~/.dev-browser/config.json`); console.log(""); -// Check if our HTTP API server is already running -console.log("Checking for existing servers..."); -try { - const res = await fetch(`http://localhost:${port}`, { - signal: AbortSignal.timeout(1000), - }); - if (res.ok) { - console.log(`Server already running on port ${port}`); - process.exit(0); - } -} catch { - // Server not running, continue to start -} - const server = await serveWithExternalBrowser({ port, cdpPort, @@ -69,12 +73,16 @@ const server = await serveWithExternalBrowser({ autoLaunch, }); -console.log(`\nDev browser server started`); +console.log(""); +console.log(`Dev browser server started`); console.log(` WebSocket: ${server.wsEndpoint}`); +console.log(` HTTP API: http://localhost:${server.port}`); console.log(` Mode: ${server.mode}`); console.log(` Tmp directory: ${tmpDir}`); -console.log(`\nReady`); -console.log(`\nPress Ctrl+C to stop (browser will remain open)`); +console.log(""); +console.log("Ready"); +console.log(""); +console.log("Press Ctrl+C to stop (browser will remain open)"); // Keep the process running await new Promise(() => {}); diff --git a/skills/dev-browser/scripts/start-server.ts b/skills/dev-browser/scripts/start-server.ts index e130a27..ccc2135 100644 --- a/skills/dev-browser/scripts/start-server.ts +++ b/skills/dev-browser/scripts/start-server.ts @@ -1,3 +1,31 @@ +/** + * Start dev-browser server in standalone mode (launches Playwright Chromium). + * + * This mode: + * - Launches a dedicated Playwright Chromium browser + * - Owns the browser lifecycle (closes when server stops) + * - Supports multiple concurrent agents via dynamic port allocation + * + * Environment variables: + * PORT - HTTP API port (default: auto-assigned from 9222-9300) + * HEADLESS - Run browser in headless mode (default: false) + * + * Configuration file: ~/.dev-browser/config.json + * { + * "portRange": { "start": 9222, "end": 9300, "step": 2 }, + * "cdpPort": 9223 + * } + * + * Multi-agent usage: + * # Terminal 1: First agent gets port 9222, launches browser + * npx tsx scripts/start-server.ts + * # Output: PORT=9222 + * + * # Terminal 2: Second agent gets port 9224, launches separate browser + * npx tsx scripts/start-server.ts + * # Output: PORT=9224 + */ + import { serve } from "@/index.js"; import { execSync } from "child_process"; import { mkdirSync, existsSync, readdirSync } from "fs"; @@ -9,9 +37,7 @@ const tmpDir = join(__dirname, "..", "tmp"); const profileDir = join(__dirname, "..", "profiles"); // Create tmp and profile directories if they don't exist -console.log("Creating tmp directory..."); mkdirSync(tmpDir, { recursive: true }); -console.log("Creating profiles directory..."); mkdirSync(profileDir, { recursive: true }); // Install Playwright browsers if not already installed @@ -72,46 +98,33 @@ try { console.log("You may need to run: npx playwright install chromium"); } -// Check if server is already running -console.log("Checking for existing servers..."); -try { - const res = await fetch("http://localhost:9222", { - signal: AbortSignal.timeout(1000), - }); - if (res.ok) { - console.log("Server already running on port 9222"); - process.exit(0); - } -} catch { - // Server not running, continue to start -} +// Configuration from environment (PORT is optional - will be auto-assigned) +const port = process.env.PORT ? parseInt(process.env.PORT, 10) : undefined; +const headless = process.env.HEADLESS === "true"; -// Clean up stale CDP port if HTTP server isn't running (crash recovery) -// This handles the case where Node crashed but Chrome is still running on 9223 -try { - const pid = execSync("lsof -ti:9223", { encoding: "utf-8" }).trim(); - if (pid) { - console.log(`Cleaning up stale Chrome process on CDP port 9223 (PID: ${pid})`); - execSync(`kill -9 ${pid}`); - } -} catch { - // No process on CDP port, which is expected -} +console.log(""); +console.log("Starting dev browser server (standalone mode)..."); +console.log(` HTTP API port: ${port ?? "auto (dynamic)"}`); +console.log(` Headless: ${headless}`); +console.log(` Config: ~/.dev-browser/config.json`); +console.log(""); -console.log("Starting dev browser server..."); -const headless = process.env.HEADLESS === "true"; const server = await serve({ - port: 9222, + port, headless, profileDir, }); +console.log(""); console.log(`Dev browser server started`); console.log(` WebSocket: ${server.wsEndpoint}`); +console.log(` HTTP API: http://localhost:${server.port}`); console.log(` Tmp directory: ${tmpDir}`); console.log(` Profile directory: ${profileDir}`); -console.log(`\nReady`); -console.log(`\nPress Ctrl+C to stop`); +console.log(""); +console.log("Ready"); +console.log(""); +console.log("Press Ctrl+C to stop"); // Keep the process running await new Promise(() => {}); diff --git a/skills/dev-browser/src/external-browser.ts b/skills/dev-browser/src/external-browser.ts index 2ca0b5a..534b99f 100644 --- a/skills/dev-browser/src/external-browser.ts +++ b/skills/dev-browser/src/external-browser.ts @@ -1,6 +1,6 @@ import express, { type Express, type Request, type Response } from "express"; import { chromium, type Browser, type BrowserContext, type Page } from "playwright"; -import { spawn, execSync } from "child_process"; +import { spawn } from "child_process"; import type { Socket } from "net"; import type { GetPageRequest, @@ -8,9 +8,20 @@ import type { ListPagesResponse, ServerInfoResponse, } from "./types"; +import { + loadConfig, + findAvailablePort, + registerServer, + unregisterServer, + outputPortForDiscovery, +} from "./port-manager.js"; export interface ExternalBrowserOptions { - /** HTTP API port (default: 9222) */ + /** + * HTTP API port. If not specified, a port is automatically assigned + * from the configured range (default: 9222-9300, step 2). + * This enables multiple agents to run concurrently. + */ port?: number; /** CDP port where external browser is listening (default: 9223) */ cdpPort?: number; @@ -115,8 +126,11 @@ function withTimeout(promise: Promise, ms: number, message: string): Promi export async function serveWithExternalBrowser( options: ExternalBrowserOptions = {} ): Promise { - const port = options.port ?? 9222; - const cdpPort = options.cdpPort ?? 9223; + const config = loadConfig(); + + // Use dynamic port allocation if port not specified + const port = options.port ?? await findAvailablePort(config); + const cdpPort = options.cdpPort ?? config.cdpPort; const autoLaunch = options.autoLaunch ?? true; const browserPath = options.browserPath; const userDataDir = options.userDataDir ?? `${process.env.HOME}/.dev-browser-profile`; @@ -268,6 +282,12 @@ export async function serveWithExternalBrowser( console.log(`HTTP API server running on port ${port}`); }); + // Register this server for multi-agent coordination + registerServer(port, process.pid); + + // Output port for agent discovery (agents parse this to know which port to connect to) + outputPortForDiscovery(port); + // Track active connections for clean shutdown const connections = new Set(); server.on("connection", (socket: Socket) => { @@ -309,7 +329,13 @@ export async function serveWithExternalBrowser( } server.close(); - console.log("Server stopped. Browser remains open."); + + // Unregister this server + const remainingServers = unregisterServer(port); + console.log( + `Server stopped. Browser remains open. ` + + `${remainingServers} other server(s) still running.` + ); }; // Signal handlers diff --git a/skills/dev-browser/src/index.ts b/skills/dev-browser/src/index.ts index d94cf8f..49d25f8 100644 --- a/skills/dev-browser/src/index.ts +++ b/skills/dev-browser/src/index.ts @@ -10,6 +10,13 @@ import type { ListPagesResponse, ServerInfoResponse, } from "./types"; +import { + loadConfig, + findAvailablePort, + registerServer, + unregisterServer, + outputPortForDiscovery, +} from "./port-manager.js"; export type { ServeOptions, GetPageResponse, ListPagesResponse, ServerInfoResponse }; @@ -20,6 +27,13 @@ export { type ExternalBrowserServer, } from "./external-browser.js"; +// Re-export port management utilities +export { + loadConfig, + findAvailablePort, + type DevBrowserConfig, +} from "./port-manager.js"; + export interface DevBrowserServer { wsEndpoint: string; port: number; @@ -59,9 +73,12 @@ function withTimeout(promise: Promise, ms: number, message: string): Promi } export async function serve(options: ServeOptions = {}): Promise { - const port = options.port ?? 9222; + const config = loadConfig(); + + // Use dynamic port allocation if port not specified + const port = options.port ?? await findAvailablePort(config); const headless = options.headless ?? false; - const cdpPort = options.cdpPort ?? 9223; + const cdpPort = options.cdpPort ?? config.cdpPort; const profileDir = options.profileDir; // Validate port numbers @@ -196,6 +213,12 @@ export async function serve(options: ServeOptions = {}): Promise(); server.on("connection", (socket: Socket) => { @@ -237,7 +260,10 @@ export async function serve(options: ServeOptions = {}): Promise { + // Check default binding (IPv6 on most systems, which Express uses) + const defaultAvailable = await new Promise((resolve) => { + const server = createServer(); + server.once("error", () => resolve(false)); + server.once("listening", () => { + server.close(() => resolve(true)); + }); + server.listen(port); + }); + + if (!defaultAvailable) return false; + + // Also check IPv4 for completeness + const ipv4Available = await new Promise((resolve) => { + const server = createServer(); + server.once("error", () => resolve(false)); + server.once("listening", () => { + server.close(() => resolve(true)); + }); + server.listen(port, "0.0.0.0"); + }); + + return ipv4Available; +} + +/** + * Find an available port in the configured range. + * @throws Error if no ports are available + */ +export async function findAvailablePort(config?: DevBrowserConfig): Promise { + const { portRange } = config || loadConfig(); + const { start, end, step } = portRange; + + for (let port = start; port < end; port += step) { + if (await isPortAvailable(port)) { + return port; + } + } + + throw new Error( + `No available ports in range ${start}-${end} (step ${step}). ` + + `Too many dev-browser servers may be running. ` + + `Check ~/.dev-browser/active-servers.json for active servers.` + ); +} + +/** + * Register a server for coordination tracking. + * This helps coordinate shutdown behavior across multiple servers. + */ +export function registerServer(port: number, pid: number): void { + mkdirSync(CONFIG_DIR, { recursive: true }); + let servers: Record = {}; + + try { + if (existsSync(SERVERS_FILE)) { + servers = JSON.parse(readFileSync(SERVERS_FILE, "utf-8")); + } + } catch { + servers = {}; + } + + // Clean up stale entries (processes that no longer exist) + for (const [portStr, serverPid] of Object.entries(servers)) { + try { + process.kill(serverPid as number, 0); // Check if process exists + } catch { + delete servers[parseInt(portStr)]; + } + } + + servers[port] = pid; + writeFileSync(SERVERS_FILE, JSON.stringify(servers, null, 2)); +} + +/** + * Unregister a server and return the count of remaining servers. + */ +export function unregisterServer(port: number): number { + let servers: Record = {}; + + try { + if (existsSync(SERVERS_FILE)) { + servers = JSON.parse(readFileSync(SERVERS_FILE, "utf-8")); + } + } catch { + servers = {}; + } + + delete servers[port]; + + // Clean up stale entries + for (const [portStr, serverPid] of Object.entries(servers)) { + try { + process.kill(serverPid as number, 0); + } catch { + delete servers[parseInt(portStr)]; + } + } + + writeFileSync(SERVERS_FILE, JSON.stringify(servers, null, 2)); + return Object.keys(servers).length; +} + +/** + * Get the count of currently active servers. + */ +export function getActiveServerCount(): number { + try { + if (!existsSync(SERVERS_FILE)) { + return 0; + } + + const servers: Record = JSON.parse( + readFileSync(SERVERS_FILE, "utf-8") + ); + + // Count only servers that are still running + let count = 0; + for (const serverPid of Object.values(servers)) { + try { + process.kill(serverPid as number, 0); + count++; + } catch { + // Process no longer exists + } + } + return count; + } catch { + return 0; + } +} + +/** + * Output the assigned port for agent discovery. + * Agents parse this output to know which port to connect to. + * + * Format: PORT=XXXX + */ +export function outputPortForDiscovery(port: number): void { + console.log(`PORT=${port}`); +} From 976f67331369ee052371ba7dc28a1dac57b8aba1 Mon Sep 17 00:00:00 2001 From: Matthew O'Riordan Date: Tue, 30 Dec 2025 12:03:32 +0100 Subject: [PATCH 3/4] feat: add smart crash recovery with orphan browser cleanup When a dev-browser server crashes, its Chrome browser may still be running on the CDP port. This adds smart cleanup to detect and terminate orphaned browsers before launching new ones. Key changes: - Enhanced ServerInfo structure to track CDP port and mode - Added detectOrphanedBrowsers() to find browsers with no registered server - Added cleanupOrphanedBrowsers() to safely terminate orphans - Standalone mode now cleans orphans on startup (before launching browser) - External mode tracks CDP port but doesn't clean (browser is intentionally external) This restores crash recovery functionality that was previously in start-server.ts, but in a smarter way that respects multi-agent scenarios. --- skills/dev-browser/src/external-browser.ts | 4 +- skills/dev-browser/src/index.ts | 17 +- skills/dev-browser/src/port-manager.ts | 251 ++++++++++++++++----- 3 files changed, 217 insertions(+), 55 deletions(-) diff --git a/skills/dev-browser/src/external-browser.ts b/skills/dev-browser/src/external-browser.ts index 534b99f..6254078 100644 --- a/skills/dev-browser/src/external-browser.ts +++ b/skills/dev-browser/src/external-browser.ts @@ -282,8 +282,8 @@ export async function serveWithExternalBrowser( console.log(`HTTP API server running on port ${port}`); }); - // Register this server for multi-agent coordination - registerServer(port, process.pid); + // Register this server for multi-agent coordination (external mode doesn't own the browser) + registerServer(port, process.pid, { cdpPort, mode: "external" }); // Output port for agent discovery (agents parse this to know which port to connect to) outputPortForDiscovery(port); diff --git a/skills/dev-browser/src/index.ts b/skills/dev-browser/src/index.ts index 49d25f8..85b5f46 100644 --- a/skills/dev-browser/src/index.ts +++ b/skills/dev-browser/src/index.ts @@ -16,6 +16,7 @@ import { registerServer, unregisterServer, outputPortForDiscovery, + cleanupOrphanedBrowsers, } from "./port-manager.js"; export type { ServeOptions, GetPageResponse, ListPagesResponse, ServerInfoResponse }; @@ -31,7 +32,11 @@ export { export { loadConfig, findAvailablePort, + cleanupOrphanedBrowsers, + detectOrphanedBrowsers, type DevBrowserConfig, + type ServerInfo, + type OrphanedBrowser, } from "./port-manager.js"; export interface DevBrowserServer { @@ -101,6 +106,14 @@ export async function serve(options: ServeOptions = {}): Promise 0) { + // Give the OS a moment to release the port + await new Promise((resolve) => setTimeout(resolve, 500)); + } + console.log("Launching browser with persistent context..."); // Launch persistent context - this persists cookies, localStorage, cache, etc. @@ -213,8 +226,8 @@ export async function serve(options: ServeOptions = {}): Promise = {}; +function processExists(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +/** + * Load the servers file, handling both old format (pid only) and new format (ServerInfo). + */ +function loadServersFile(): Record { + if (!existsSync(SERVERS_FILE)) { + return {}; + } try { - if (existsSync(SERVERS_FILE)) { - servers = JSON.parse(readFileSync(SERVERS_FILE, "utf-8")); + const content = readFileSync(SERVERS_FILE, "utf-8"); + const data = JSON.parse(content); + + // Handle migration from old format { port: pid } to new format { port: ServerInfo } + const servers: Record = {}; + for (const [port, value] of Object.entries(data)) { + if (typeof value === "number") { + // Old format: migrate to new format + servers[port] = { + pid: value, + mode: "standalone", // Assume standalone for old entries + startedAt: new Date().toISOString(), + }; + } else { + // New format + servers[port] = value as ServerInfo; + } } + return servers; } catch { - servers = {}; + return {}; } +} - // Clean up stale entries (processes that no longer exist) - for (const [portStr, serverPid] of Object.entries(servers)) { - try { - process.kill(serverPid as number, 0); // Check if process exists - } catch { - delete servers[parseInt(portStr)]; +/** + * Save the servers file. + */ +function saveServersFile(servers: Record): void { + mkdirSync(CONFIG_DIR, { recursive: true }); + writeFileSync(SERVERS_FILE, JSON.stringify(servers, null, 2)); +} + +/** + * Clean up stale entries from servers file (processes that no longer exist). + */ +function cleanupStaleEntries(servers: Record): Record { + const cleaned: Record = {}; + for (const [port, info] of Object.entries(servers)) { + if (processExists(info.pid)) { + cleaned[port] = info; } } + return cleaned; +} - servers[port] = pid; - writeFileSync(SERVERS_FILE, JSON.stringify(servers, null, 2)); +/** + * Register a server for coordination tracking. + * This helps coordinate shutdown behavior and orphan detection. + */ +export function registerServer( + port: number, + pid: number, + options?: { + cdpPort?: number; + browserPid?: number; + mode?: "standalone" | "external"; + } +): void { + mkdirSync(CONFIG_DIR, { recursive: true }); + + let servers = loadServersFile(); + servers = cleanupStaleEntries(servers); + + servers[port.toString()] = { + pid, + cdpPort: options?.cdpPort, + browserPid: options?.browserPid, + mode: options?.mode ?? "standalone", + startedAt: new Date().toISOString(), + }; + + saveServersFile(servers); } /** * Unregister a server and return the count of remaining servers. */ export function unregisterServer(port: number): number { - let servers: Record = {}; + let servers = loadServersFile(); + delete servers[port.toString()]; + servers = cleanupStaleEntries(servers); + saveServersFile(servers); + return Object.keys(servers).length; +} +/** + * Get the count of currently active servers. + */ +export function getActiveServerCount(): number { + const servers = loadServersFile(); + const cleaned = cleanupStaleEntries(servers); + return Object.keys(cleaned).length; +} + +/** + * Get process ID listening on a specific port (macOS/Linux). + * Returns null if no process is listening or on error. + */ +function getProcessOnPort(port: number): number | null { try { - if (existsSync(SERVERS_FILE)) { - servers = JSON.parse(readFileSync(SERVERS_FILE, "utf-8")); + // Works on macOS and Linux + const output = execSync(`lsof -ti:${port}`, { + encoding: "utf-8", + stdio: ["pipe", "pipe", "pipe"], + }).trim(); + + if (output) { + // May return multiple PIDs, take the first one + const firstLine = output.split("\n")[0] ?? ""; + const pid = parseInt(firstLine, 10); + return isNaN(pid) ? null : pid; } } catch { - servers = {}; + // No process on port or lsof not available } + return null; +} - delete servers[port]; +/** + * Information about an orphaned browser. + */ +export interface OrphanedBrowser { + cdpPort: number; + pid: number; +} - // Clean up stale entries - for (const [portStr, serverPid] of Object.entries(servers)) { - try { - process.kill(serverPid as number, 0); - } catch { - delete servers[parseInt(portStr)]; +/** + * Detect orphaned browsers - browsers running on CDP ports with no registered server. + * + * This handles crash recovery: if a server crashed without cleanup, its browser + * may still be running. This function identifies such orphans. + * + * @param cdpPorts - CDP ports to check (default: common ports 9223, 9225, etc.) + * @returns List of orphaned browsers + */ +export function detectOrphanedBrowsers(cdpPorts?: number[]): OrphanedBrowser[] { + const servers = loadServersFile(); + const cleanedServers = cleanupStaleEntries(servers); + + // Get CDP ports that have active servers + const activeCdpPorts = new Set(); + for (const info of Object.values(cleanedServers)) { + if (info.cdpPort) { + activeCdpPorts.add(info.cdpPort); } } - writeFileSync(SERVERS_FILE, JSON.stringify(servers, null, 2)); - return Object.keys(servers).length; + // Default ports to check if not specified + const portsToCheck = cdpPorts ?? [9223, 9225, 9227, 9229, 9231]; + + const orphans: OrphanedBrowser[] = []; + for (const cdpPort of portsToCheck) { + // Skip if an active server claims this CDP port + if (activeCdpPorts.has(cdpPort)) { + continue; + } + + // Check if something is running on this port + const pid = getProcessOnPort(cdpPort); + if (pid !== null) { + orphans.push({ cdpPort, pid }); + } + } + + return orphans; } /** - * Get the count of currently active servers. + * Clean up orphaned browsers from previous crashed sessions. + * + * This is useful for standalone mode where the server owns the browser lifecycle. + * Only kills processes that are truly orphaned (no registered server). + * + * @param cdpPorts - CDP ports to check for orphans + * @returns Number of orphaned browsers cleaned up */ -export function getActiveServerCount(): number { - try { - if (!existsSync(SERVERS_FILE)) { - return 0; - } +export function cleanupOrphanedBrowsers(cdpPorts?: number[]): number { + const orphans = detectOrphanedBrowsers(cdpPorts); + let cleaned = 0; - const servers: Record = JSON.parse( - readFileSync(SERVERS_FILE, "utf-8") - ); - - // Count only servers that are still running - let count = 0; - for (const serverPid of Object.values(servers)) { - try { - process.kill(serverPid as number, 0); - count++; - } catch { - // Process no longer exists - } + for (const orphan of orphans) { + try { + console.log( + `Cleaning up orphaned browser on CDP port ${orphan.cdpPort} (PID: ${orphan.pid})` + ); + process.kill(orphan.pid, "SIGTERM"); + cleaned++; + } catch (err) { + console.warn( + `Warning: Could not kill orphaned process ${orphan.pid}: ${err}` + ); } - return count; - } catch { - return 0; } + + return cleaned; } /** From 9e11a6db65373ebaa12bfe9a86bb3601813e4374 Mon Sep 17 00:00:00 2001 From: Matthew O'Riordan Date: Tue, 30 Dec 2025 23:36:16 +0100 Subject: [PATCH 4/4] feat: add unified browser mode auto-detection with config file - Add ~/.dev-browser/config.json for browser configuration - Auto-detect Chrome for Testing on macOS/Linux/Windows - Add --standalone flag to force Playwright mode - Skip npm install when dependencies unchanged (hash check) - Rename port-manager.ts to config.ts with browser config - Let browser use default profile unless userDataDir explicitly set - Simplify SKILL.md documentation with single startup flow --- .gitignore | 1 + skills/dev-browser/SKILL.md | 66 ++++----- .../dev-browser/scripts/get-browser-config.ts | 37 +++++ .../scripts/start-external-browser.ts | 5 +- skills/dev-browser/server.sh | 75 +++++++++- .../src/{port-manager.ts => config.ts} | 132 +++++++++++++++++- skills/dev-browser/src/external-browser.ts | 16 ++- skills/dev-browser/src/index.ts | 8 +- 8 files changed, 282 insertions(+), 58 deletions(-) create mode 100644 skills/dev-browser/scripts/get-browser-config.ts rename skills/dev-browser/src/{port-manager.ts => config.ts} (71%) diff --git a/.gitignore b/.gitignore index 1fd9cd3..96c2472 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ coverage/ # Temporary files tmp/ temp/ +.npm-install-hash # Browser profiles profiles/ diff --git a/skills/dev-browser/SKILL.md b/skills/dev-browser/SKILL.md index 4bed812..cbad2f7 100644 --- a/skills/dev-browser/SKILL.md +++ b/skills/dev-browser/SKILL.md @@ -15,60 +15,44 @@ Browser automation that maintains page state across script executions. Write sma ## Setup -Three modes available. Ask the user if unclear which to use. - -### Standalone Mode (Default) - -Launches a new Chromium browser for fresh automation sessions. - ```bash ./skills/dev-browser/server.sh & ``` -Add `--headless` flag if user requests it. **Wait for the `Ready` message before running scripts.** +**Wait for the `Ready` message before running scripts.** -### External Browser Mode +The server auto-detects the best browser mode based on user configuration at `~/.dev-browser/config.json`: -Connects to an external browser (like Chrome for Testing) via Chrome DevTools Protocol (CDP). Use this when: +- **External Browser** (default when Chrome for Testing is installed): Uses Chrome for Testing via CDP. Browser stays open after automation. +- **Standalone**: Uses Playwright's built-in Chromium. Use `--standalone` flag to force this mode. -- User wants to use a specific browser build (Chrome for Testing, Chrome Beta, etc.) -- User wants the browser to stay open after automation for manual inspection -- User wants visible browser automation for local development -- No extension installation required - -**Start the server:** - -```bash -cd skills/dev-browser && BROWSER_PATH="/path/to/chrome" npx tsx scripts/start-external-browser.ts & -``` +**Flags:** +- `--standalone` - Force standalone Playwright mode +- `--headless` - Run headless (standalone mode only) -**Environment variables:** -- `PORT` - HTTP API port (default: 9222) -- `CDP_PORT` - Browser's CDP port (default: 9223) -- `BROWSER_PATH` - Path to browser executable (enables auto-launch) -- `USER_DATA_DIR` - Browser profile directory (default: ~/.dev-browser-profile) -- `AUTO_LAUNCH` - Auto-launch browser if not running (default: true) +### Configuration -**Example with Chrome for Testing (macOS):** +Browser settings are configured in `~/.dev-browser/config.json`: -```bash -BROWSER_PATH="/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing" \ -npx tsx scripts/start-external-browser.ts & +```json +{ + "browser": { + "mode": "auto", + "path": "/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing" + } +} ``` -**Or start the browser manually first:** - -```bash -# Start Chrome for Testing with CDP enabled -"/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing" \ - --remote-debugging-port=9223 \ - --user-data-dir=~/.chrome-for-testing-data & - -# Then start the dev-browser server (no BROWSER_PATH needed) -cd skills/dev-browser && npx tsx scripts/start-external-browser.ts & -``` +| Setting | Values | Description | +|---------|--------|-------------| +| `browser.mode` | `"auto"` (default), `"external"`, `"standalone"` | `auto` uses Chrome for Testing if found, otherwise Playwright | +| `browser.path` | Path string | Custom browser executable path (auto-detected if not set) | +| `browser.userDataDir` | Path string | Browser profile directory for external mode (uses browser's default if not set) | -**Key difference:** When you stop the dev-browser server, the browser stays open. This is by design—you manage the browser lifecycle, dev-browser just connects to it. +**Auto-detection paths:** +- **macOS**: `/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing` +- **Linux**: `/opt/google/chrome-for-testing/chrome`, `/usr/bin/google-chrome-for-testing` +- **Windows**: `C:\Program Files\Google\Chrome for Testing\Application\chrome.exe` ### Extension Mode diff --git a/skills/dev-browser/scripts/get-browser-config.ts b/skills/dev-browser/scripts/get-browser-config.ts new file mode 100644 index 0000000..127d2eb --- /dev/null +++ b/skills/dev-browser/scripts/get-browser-config.ts @@ -0,0 +1,37 @@ +/** + * Output resolved browser configuration for shell scripts. + * + * Usage: npx tsx scripts/get-browser-config.ts + * + * Output format (shell-eval compatible): + * BROWSER_MODE="external" + * BROWSER_PATH="/path/to/chrome" + * BROWSER_USER_DATA_DIR="/path/to/profile" + */ + +import { getResolvedBrowserConfig } from "@/config.js"; + +/** + * Shell-escape a string value for safe eval. + */ +function shellEscape(value: string): string { + // Use double quotes and escape special characters + return `"${value.replace(/"/g, '\\"')}"`; +} + +try { + const config = getResolvedBrowserConfig(); + + // Output in shell-eval format with proper quoting + console.log(`BROWSER_MODE=${shellEscape(config.mode)}`); + console.log(`BROWSER_PATH=${shellEscape(config.path || "")}`); + // Only output userDataDir if explicitly configured + console.log(`BROWSER_USER_DATA_DIR=${shellEscape(config.userDataDir || "")}`); +} catch (err) { + // On error, output standalone mode as fallback + console.error(`Warning: ${err instanceof Error ? err.message : err}`); + console.log(`BROWSER_MODE="standalone"`); + console.log(`BROWSER_PATH=""`); + console.log(`BROWSER_USER_DATA_DIR=""`); + process.exit(0); // Don't fail - standalone is a valid fallback +} diff --git a/skills/dev-browser/scripts/start-external-browser.ts b/skills/dev-browser/scripts/start-external-browser.ts index 18484a6..1d41e7c 100644 --- a/skills/dev-browser/scripts/start-external-browser.ts +++ b/skills/dev-browser/scripts/start-external-browser.ts @@ -51,7 +51,8 @@ mkdirSync(tmpDir, { recursive: true }); const port = process.env.PORT ? parseInt(process.env.PORT, 10) : undefined; const cdpPort = process.env.CDP_PORT ? parseInt(process.env.CDP_PORT, 10) : undefined; const browserPath = process.env.BROWSER_PATH; -const userDataDir = process.env.USER_DATA_DIR || `${process.env.HOME}/.dev-browser-profile`; +// Only pass userDataDir if explicitly set - let browser use default profile otherwise +const userDataDir = process.env.USER_DATA_DIR || undefined; const autoLaunch = process.env.AUTO_LAUNCH !== "false"; console.log("Starting dev-browser with external browser mode..."); @@ -60,7 +61,7 @@ console.log(` CDP port: ${cdpPort ?? "from config (default: 9223)"}`); if (browserPath) { console.log(` Browser path: ${browserPath}`); } -console.log(` User data dir: ${userDataDir}`); +console.log(` User data dir: ${userDataDir ?? "(default profile)"}`); console.log(` Auto-launch: ${autoLaunch}`); console.log(` Config: ~/.dev-browser/config.json`); console.log(""); diff --git a/skills/dev-browser/server.sh b/skills/dev-browser/server.sh index 50369a4..fc1604c 100755 --- a/skills/dev-browser/server.sh +++ b/skills/dev-browser/server.sh @@ -8,17 +8,82 @@ cd "$SCRIPT_DIR" # Parse command line arguments HEADLESS=false +FORCE_STANDALONE=false while [[ "$#" -gt 0 ]]; do case $1 in --headless) HEADLESS=true ;; + --standalone) FORCE_STANDALONE=true ;; *) echo "Unknown parameter: $1"; exit 1 ;; esac shift done -echo "Installing dependencies..." -npm install +# Conditional npm install - only if node_modules missing or package-lock changed +NEEDS_INSTALL=false +HASH_FILE="$SCRIPT_DIR/.npm-install-hash" -echo "Starting dev-browser server..." -export HEADLESS=$HEADLESS -npx tsx scripts/start-server.ts +if [ ! -d "$SCRIPT_DIR/node_modules" ]; then + NEEDS_INSTALL=true +elif [ -f "$SCRIPT_DIR/package-lock.json" ]; then + CURRENT_HASH=$(shasum "$SCRIPT_DIR/package-lock.json" 2>/dev/null | cut -d' ' -f1) + SAVED_HASH=$(cat "$HASH_FILE" 2>/dev/null || echo "") + if [ "$CURRENT_HASH" != "$SAVED_HASH" ]; then + NEEDS_INSTALL=true + fi +fi + +if [ "$NEEDS_INSTALL" = true ]; then + echo "Installing dependencies..." + npm install --prefer-offline --no-audit --no-fund + # Save hash for next time + if [ -f "$SCRIPT_DIR/package-lock.json" ]; then + shasum "$SCRIPT_DIR/package-lock.json" | cut -d' ' -f1 > "$HASH_FILE" + fi +else + echo "Dependencies up to date (skipping npm install)" +fi + +# Get browser configuration from config file +# Config is at ~/.dev-browser/config.json +if [ "$FORCE_STANDALONE" = true ]; then + BROWSER_MODE="standalone" + BROWSER_PATH="" +else + # Read config using TypeScript helper + CONFIG_OUTPUT=$(npx tsx scripts/get-browser-config.ts 2>/dev/null) + if [ $? -eq 0 ]; then + eval "$CONFIG_OUTPUT" + else + # Fallback to standalone if config read fails + BROWSER_MODE="standalone" + BROWSER_PATH="" + fi +fi + +# Start the appropriate server mode +if [ "$BROWSER_MODE" = "external" ] && [ -n "$BROWSER_PATH" ]; then + echo "Starting dev-browser server (External Browser mode)..." + echo " Browser: $BROWSER_PATH" + echo " Config: ~/.dev-browser/config.json" + echo " Use --standalone flag to force standalone Playwright mode" + echo "" + + export BROWSER_PATH + # Only export USER_DATA_DIR if explicitly configured (not empty) + if [ -n "$BROWSER_USER_DATA_DIR" ]; then + export USER_DATA_DIR="$BROWSER_USER_DATA_DIR" + fi + npx tsx scripts/start-external-browser.ts +else + echo "Starting dev-browser server (Standalone mode)..." + if [ "$FORCE_STANDALONE" = true ]; then + echo " Standalone mode forced via --standalone flag" + elif [ -z "$BROWSER_PATH" ]; then + echo " Chrome for Testing not found - using Playwright Chromium" + echo " Configure browser.path in ~/.dev-browser/config.json" + fi + echo "" + + export HEADLESS=$HEADLESS + npx tsx scripts/start-server.ts +fi diff --git a/skills/dev-browser/src/port-manager.ts b/skills/dev-browser/src/config.ts similarity index 71% rename from skills/dev-browser/src/port-manager.ts rename to skills/dev-browser/src/config.ts index 804af3e..f9237df 100644 --- a/skills/dev-browser/src/port-manager.ts +++ b/skills/dev-browser/src/config.ts @@ -20,6 +20,40 @@ import { execSync } from "child_process"; import { mkdirSync, existsSync, readFileSync, writeFileSync } from "fs"; import { join } from "path"; +/** + * Browser mode selection. + * - "auto": Detect Chrome for Testing, fall back to standalone (default) + * - "external": Always use external browser via CDP (fail if not found) + * - "standalone": Always use Playwright's built-in Chromium + */ +export type BrowserMode = "auto" | "external" | "standalone"; + +/** + * Browser configuration for dev-browser. + */ +export interface BrowserConfig { + /** + * Browser mode selection (default: "auto") + * - "auto": Detect Chrome for Testing, fall back to standalone + * - "external": Always use external browser via CDP + * - "standalone": Always use Playwright's built-in Chromium + */ + mode: BrowserMode; + /** + * Path to browser executable for external mode. + * If not set, uses platform-specific defaults: + * - macOS: /Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing + * - Linux: /opt/google/chrome-for-testing/chrome or google-chrome-for-testing + * - Windows: C:\Program Files\Google\Chrome for Testing\Application\chrome.exe + */ + path?: string; + /** + * User data directory for browser profile. + * Default: ~/.dev-browser-profile + */ + userDataDir?: string; +} + /** * Configuration for dev-browser multi-agent support. */ @@ -38,6 +72,8 @@ export interface DevBrowserConfig { }; /** CDP port for external browser mode (default: 9223) */ cdpPort: number; + /** Browser configuration */ + browser: BrowserConfig; } /** @@ -60,6 +96,41 @@ const CONFIG_DIR = join(process.env.HOME || "", ".dev-browser"); const CONFIG_FILE = join(CONFIG_DIR, "config.json"); const SERVERS_FILE = join(CONFIG_DIR, "active-servers.json"); +/** + * Get platform-specific default browser path for Chrome for Testing. + */ +function getDefaultBrowserPath(): string | undefined { + const platform = process.platform; + + if (platform === "darwin") { + // macOS: Check standard installation path + const macPath = "/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing"; + if (existsSync(macPath)) { + return macPath; + } + } else if (platform === "linux") { + // Linux: Check common installation paths + const linuxPaths = [ + "/opt/google/chrome-for-testing/chrome", + "/usr/bin/google-chrome-for-testing", + "/usr/local/bin/chrome-for-testing", + ]; + for (const path of linuxPaths) { + if (existsSync(path)) { + return path; + } + } + } else if (platform === "win32") { + // Windows: Check standard installation path + const winPath = "C:\\Program Files\\Google\\Chrome for Testing\\Application\\chrome.exe"; + if (existsSync(winPath)) { + return winPath; + } + } + + return undefined; +} + /** * Default configuration values. */ @@ -70,29 +141,86 @@ const DEFAULT_CONFIG: DevBrowserConfig = { step: 2, // Skip odd ports to avoid CDP port collision }, cdpPort: 9223, + browser: { + mode: "auto", + // userDataDir intentionally not set - let browser use its default profile + // unless user explicitly configures it in ~/.dev-browser/config.json + }, }; /** * Load configuration from ~/.dev-browser/config.json with defaults. + * Merges user config with defaults and resolves platform-specific browser paths. */ export function loadConfig(): DevBrowserConfig { + let config = { ...DEFAULT_CONFIG }; + try { if (existsSync(CONFIG_FILE)) { const content = readFileSync(CONFIG_FILE, "utf-8"); const userConfig = JSON.parse(content); - return { + config = { ...DEFAULT_CONFIG, ...userConfig, portRange: { ...DEFAULT_CONFIG.portRange, ...(userConfig.portRange || {}), }, + browser: { + ...DEFAULT_CONFIG.browser, + ...(userConfig.browser || {}), + }, }; } } catch (err) { console.warn(`Warning: Could not load config from ${CONFIG_FILE}:`, err); } - return DEFAULT_CONFIG; + + // Resolve browser path: user config > auto-detection > undefined + if (!config.browser.path) { + config.browser.path = getDefaultBrowserPath(); + } + + return config; +} + +/** + * Get resolved browser configuration for use by server scripts. + * Returns the effective browser mode and path based on config and detection. + */ +export function getResolvedBrowserConfig(): { + mode: "external" | "standalone"; + path?: string; + userDataDir?: string; +} { + const config = loadConfig(); + const { browser } = config; + + // Determine effective mode + let effectiveMode: "external" | "standalone"; + + if (browser.mode === "standalone") { + effectiveMode = "standalone"; + } else if (browser.mode === "external") { + if (!browser.path) { + throw new Error( + `Browser mode is "external" but no browser path configured or detected. ` + + `Set browser.path in ~/.dev-browser/config.json or install Chrome for Testing.` + ); + } + effectiveMode = "external"; + } else { + // "auto" mode: use external if browser found, otherwise standalone + effectiveMode = browser.path ? "external" : "standalone"; + } + + return { + mode: effectiveMode, + path: browser.path, + // Only include userDataDir if explicitly configured by user + // For external mode, let the browser use its default profile unless specified + userDataDir: browser.userDataDir, + }; } /** diff --git a/skills/dev-browser/src/external-browser.ts b/skills/dev-browser/src/external-browser.ts index 6254078..9da775f 100644 --- a/skills/dev-browser/src/external-browser.ts +++ b/skills/dev-browser/src/external-browser.ts @@ -14,7 +14,7 @@ import { registerServer, unregisterServer, outputPortForDiscovery, -} from "./port-manager.js"; +} from "./config.js"; export interface ExternalBrowserOptions { /** @@ -81,18 +81,23 @@ async function getCdpEndpoint(cdpPort: number, maxRetries = 60): Promise function launchBrowserDetached( browserPath: string, cdpPort: number, - userDataDir: string + userDataDir?: string ): void { const args = [ `--remote-debugging-port=${cdpPort}`, - `--user-data-dir=${userDataDir}`, "--no-first-run", "--no-default-browser-check", ]; + // Only add user-data-dir if explicitly configured + // This lets the browser use its default profile when not specified + if (userDataDir) { + args.push(`--user-data-dir=${userDataDir}`); + } + console.log(`Launching browser: ${browserPath}`); console.log(` CDP port: ${cdpPort}`); - console.log(` User data: ${userDataDir}`); + console.log(` User data: ${userDataDir ?? "(default profile)"}`); const child = spawn(browserPath, args, { detached: true, @@ -133,7 +138,8 @@ export async function serveWithExternalBrowser( const cdpPort = options.cdpPort ?? config.cdpPort; const autoLaunch = options.autoLaunch ?? true; const browserPath = options.browserPath; - const userDataDir = options.userDataDir ?? `${process.env.HOME}/.dev-browser-profile`; + // Only use userDataDir if explicitly provided - let browser use default profile otherwise + const userDataDir = options.userDataDir; // Validate port numbers if (port < 1 || port > 65535) { diff --git a/skills/dev-browser/src/index.ts b/skills/dev-browser/src/index.ts index 85b5f46..3d74ba2 100644 --- a/skills/dev-browser/src/index.ts +++ b/skills/dev-browser/src/index.ts @@ -17,7 +17,7 @@ import { unregisterServer, outputPortForDiscovery, cleanupOrphanedBrowsers, -} from "./port-manager.js"; +} from "./config.js"; export type { ServeOptions, GetPageResponse, ListPagesResponse, ServerInfoResponse }; @@ -28,16 +28,18 @@ export { type ExternalBrowserServer, } from "./external-browser.js"; -// Re-export port management utilities +// Re-export configuration utilities export { loadConfig, findAvailablePort, cleanupOrphanedBrowsers, detectOrphanedBrowsers, type DevBrowserConfig, + type BrowserConfig, + type BrowserMode, type ServerInfo, type OrphanedBrowser, -} from "./port-manager.js"; +} from "./config.js"; export interface DevBrowserServer { wsEndpoint: string;