jamiepine · rgr4y · Feb 3, 2026 · Feb 13, 2026 · Feb 14, 2026 · Feb 14, 2026
diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md
@@ -0,0 +1,25 @@
+# Voicebox Project Notes
+
+## CLI — voicebox-cli vs cli.py
+
+**`voicebox/voicebox-cli`** is the real CLI. It is stdlib-only (no pip deps), self-contained, and is what users actually run. It has all commands: `server`, `voices`, `import`, `generate`/`say`, `health`, `config`, `transcribe`, `create-voice`. Config persists to `~/.config/voicebox/config.json`.
+
+**`voicebox/backend/cli.py`** is dead code. It predates `voicebox-cli` and was superseded. Its only live reference is the launcher line in `setup-linux.sh` which is intentionally left as-is. **Do not modify cli.py.**
+
+When the user asks for CLI changes, always work on `voicebox-cli`.
+
+## Key Architecture
+
+- **Backend**: FastAPI (`backend/main.py`) served by uvicorn on port 17493
+- **Entry points**: `server.py` (PyInstaller binary), `backend/main.py __main__` (dev)
+- **Dev script**: `scripts/dev-backend-watch.sh` — loads `.env` from `voicebox/` and `../` then runs uvicorn with `--reload`
+- **MLX backend**: `backend/backends/mlx_backend.py` — Apple Silicon only, uses mlx-audio. Models: `mlx-community/Qwen3-TTS-12Hz-{1.7B,0.6B}-Base-4bit`. Uses `Base` variants (not `CustomVoice` — those require a named speaker, not ref_audio).
+- **PyTorch backend**: `backend/backends/pytorch_backend.py` — CUDA/CPU, uses qwen-tts
+- **Logging**: stdlib `logging`. Set `LOG_LEVEL=DEBUG` env var for verbose output.
+
+## MLX Gotchas
+
+- `transformers` verbosity is suppressed at module-level import in `mlx_backend.py` — do not restore or move this
+- Concurrent MLX loads crash Metal (`commit an already committed command buffer`) — serialized via `_MLX_LOAD_LOCK` threading lock in `load_model_async`
+- `CustomVoice` model variants require a named speaker arg; `Base` variants support arbitrary voice cloning via `ref_audio`/`ref_text`
+- On 16GB unified memory, bf16 models cause swap pressure — use 4-bit quantized variants
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,27 @@
+data/
+backend/venv/
+node_modules/
+__pycache__/
+*.pyc
+*.egg-info/
+.claude/
+.git/
+.github/
+.vscode/
+*.md
+docs/
+mlx-test/
+scripts/
+tauri/
+web/
+landing/
+.DS_Store
+*.log
+*.cache
+dist/
+build/
+.env
+.env.*
+*.swp
+*.swo
+*~
diff --git a/.githooks/pre-commit b/.githooks/pre-commit
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# Pre-commit hook: lint staged files
+# Install: git config core.hooksPath .githooks
+set -euo pipefail
+
+REPO_ROOT="$(git rev-parse --show-toplevel)"
+STAGED=$(git diff --cached --name-only --diff-filter=ACMR)
+
+if [[ -z "$STAGED" ]]; then
+    exit 0
+fi
+
+FAILED=0
+
+# ── Python (ruff) ─────────────────────────────────────────────────────────────
+PY_FILES=$(echo "$STAGED" | grep '\.py$' || true)
+if [[ -n "$PY_FILES" ]]; then
+    RUFF="$REPO_ROOT/backend/venv/bin/ruff"
+    if [[ -x "$RUFF" ]]; then
+        echo "→ ruff: checking Python files..."
+        if ! echo "$PY_FILES" | xargs "$RUFF" check --quiet; then
+            echo "  ruff found issues. Run: backend/venv/bin/ruff check --fix <file>"
+            FAILED=1
+        fi
+    else
+        echo "  (ruff not found in backend/venv — skipping Python lint)"
+    fi
+fi
+
+# ── JS/TS (biome) ─────────────────────────────────────────────────────────────
+JS_FILES=$(echo "$STAGED" | grep -E '\.(js|jsx|ts|tsx|json)$' | grep -v 'node_modules' || true)
+if [[ -n "$JS_FILES" ]]; then
+    BIOME=$(command -v biome 2>/dev/null \
+        || ls "$REPO_ROOT"/node_modules/.bin/biome 2>/dev/null \
+        || ls "$REPO_ROOT"/app/node_modules/.bin/biome 2>/dev/null \
+        || true)
+    if [[ -x "$BIOME" ]]; then
+        echo "→ biome: checking JS/TS files..."
+        if ! echo "$JS_FILES" | xargs "$BIOME" check --no-errors-on-unmatched; then
+            echo "  biome found issues. Run: biome check --write <file>"
+            FAILED=1
+        fi
+    else
+        echo "  (biome not found — skipping JS/TS lint)"
+    fi
+fi
+
+exit $FAILED
diff --git a/.gitignore b/.gitignore
@@ -39,6 +39,8 @@ data/profiles/*
 data/generations/*
 data/projects/*
 data/voicebox.db
+data/huggingface
+data/model_prefs.json
 !data/.gitkeep
 
 # Logs
@@ -57,3 +59,7 @@ tauri/src-tauri/binaries/*
 tmp/
 temp/
 *.tmp
+output*.m4a
+package-lock.json
+.claude
+tauri/src-tauri/gen/Assets.car
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.12.12
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,88 @@
+# Voicebox TTS Server
+# CUDA 12.9 + Python 3.12 on Ubuntu 24.04
+#
+# Build:
+#   DOCKER_BUILDKIT=1 docker build -t voicebox .
+#   DOCKER_BUILDKIT=1 docker build --build-arg CUDA=0 -t voicebox-cpu .
+#   DOCKER_BUILDKIT=1 docker build --build-arg SERVERLESS=1 -t voicebox-serverless .
+#
+# Run:
+#   docker compose up -d
+#
+# syntax=docker/dockerfile:1.4
+
+ARG CUDA=1
+ARG SERVERLESS=0
+
+# --- Base stage ---
+FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS base-cuda
+FROM ubuntu:24.04 AS base-cpu
+
+# --- Pick base based on CUDA arg --
+FROM base-cuda AS base-1
+FROM base-cpu AS base-0
+FROM base-${CUDA} AS base
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    apt-get update && apt-get install -y --no-install-recommends \
+    python3 \
+    python3-venv \
+    python3-dev \
+    python3-pip \
+    libsndfile1 \
+    ffmpeg \
+    curl \
+    sox \
+    && rm -rf /var/lib/apt/lists/*
+
+# --- Dependencies stage (cached layer) ---
+FROM base AS deps
+
+ARG CUDA
+WORKDIR /app
+
+# Create virtual environment outside /app to survive volume mount
+RUN python3 -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+
+COPY backend/requirements.txt ./requirements.txt
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install --upgrade pip && \
+    if [ "$CUDA" = "1" ]; then \
+        pip install torch torchaudio torchvision --index-url https://download.pytorch.org/whl/cu124 && \
+        pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu124; \
+    else \
+        pip install torch torchaudio torchvision --index-url https://download.pytorch.org/whl/cpu && \
+        pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu; \
+    fi
+
+# Source is volume-mounted at runtime (local dev) or COPYed below (serverless)
+ENV HF_HOME=/app/data/huggingface
+ENV PATH="/opt/venv/bin:$PATH"
+
+# Copy source into image for non-volume-mount deployments (e.g. RunPod)
+COPY backend/ /app/backend/
+
+# --- Normal mode: FastAPI server on port 17493 ---
+FROM deps AS final-0
+EXPOSE 17493
+HEALTHCHECK --interval=60s --timeout=5s --start-period=30s --retries=3 \
+    CMD curl -f http://localhost:17493/health || exit 1
+ENTRYPOINT ["/opt/venv/bin/python3", "-m", "backend.main"]
+CMD ["--host", "0.0.0.0", "--port", "17493", "--data-dir", "/app/data"]
+
+# --- Serverless mode: RunPod handler ---
+FROM deps AS final-1
+ENV SERVERLESS=1
+HEALTHCHECK NONE
+ENTRYPOINT ["/opt/venv/bin/python3", "-u", "-m", "backend.serverless_handler"]
+CMD []
+
+# --- Pick final stage based on SERVERLESS arg ---
+ARG SERVERLESS
+FROM final-${SERVERLESS} AS final
diff --git a/Makefile b/Makefile
@@ -41,19 +41,29 @@ setup: setup-js setup-python ## Full project setup (all dependencies)
 	@echo -e "  Run $(YELLOW)make dev$(NC) to start development servers"
 
 setup-js: ## Install JavaScript dependencies (bun)
+	@command -v bun >/dev/null 2>&1 || { \
+		echo -e "$(YELLOW)bun not found — installing...$(NC)"; \
+		curl -fsSL https://bun.sh/install | bash; \
+	}
 	@echo -e "$(BLUE)Installing JavaScript dependencies...$(NC)"
 	bun install
 
 setup-python: $(VENV)/bin/activate ## Set up Python virtual environment and dependencies
 	@echo -e "$(BLUE)Installing Python dependencies...$(NC)"
 	$(PIP) install --upgrade pip
-	$(PIP) install -r $(BACKEND_DIR)/requirements.txt
 	@if [ "$$(uname -m)" = "arm64" ] && [ "$$(uname)" = "Darwin" ]; then \
-		echo -e "$(BLUE)Detected Apple Silicon - installing MLX dependencies...$(NC)"; \
+		echo -e "$(BLUE)Detected Apple Silicon - using MLX-compatible dependency resolution...$(NC)"; \
 		$(PIP) install -r $(BACKEND_DIR)/requirements-mlx.txt; \
+		grep -v -E "^transformers" $(BACKEND_DIR)/requirements.txt > /tmp/voicebox-requirements-filtered.txt; \
+		$(PIP) install -r /tmp/voicebox-requirements-filtered.txt; \
+		rm /tmp/voicebox-requirements-filtered.txt; \
+		$(PIP) install --no-deps git+https://github.com/QwenLM/Qwen3-TTS.git; \
 		echo -e "$(GREEN)✓ MLX backend enabled (native Metal acceleration)$(NC)"; \
+		echo -e "$(YELLOW)Note: Using transformers 5.0.0rc3 (required by MLX)$(NC)"; \
+	else \
+		$(PIP) install -r $(BACKEND_DIR)/requirements.txt; \
+		$(PIP) install git+https://github.com/QwenLM/Qwen3-TTS.git; \
 	fi
-	$(PIP) install git+https://github.com/QwenLM/Qwen3-TTS.git
 	@echo -e "$(GREEN)✓ Python environment ready$(NC)"
 
 $(VENV)/bin/activate:
@@ -72,7 +82,7 @@ setup-rust: ## Install Rust toolchain (if not present)
 # DEVELOPMENT
 # =============================================================================
 
-.PHONY: dev dev-backend dev-frontend dev-web kill-dev
+.PHONY: dev dev-backend dev-backend-watch dev-frontend dev-web kill-dev
 
 dev: ## Start backend + desktop app (parallel)
 	@echo -e "$(BLUE)Starting development servers...$(NC)"
@@ -82,9 +92,11 @@ dev: ## Start backend + desktop app (parallel)
 		sleep 2 && $(MAKE) dev-frontend & \
 		wait
 
-dev-backend: ## Start FastAPI backend server
+dev-backend: dev-backend-watch ## Start FastAPI backend server (venv-verified, auto-reload)
+
+dev-backend-watch: ## Start backend with venv verification + Python file watching
 	@echo -e "$(BLUE)Starting backend server on http://localhost:17493$(NC)"
-	$(VENV_BIN)/uvicorn backend.main:app --reload --port 17493
+	./scripts/dev-backend-watch.sh
 
 dev-frontend: ## Start Tauri desktop app
 	@echo -e "$(BLUE)Starting Tauri desktop app...$(NC)"