jamiepine · mpecanha · Feb 22, 2026
diff --git a/PATCH_NOTES.md b/PATCH_NOTES.md
@@ -0,0 +1,58 @@
+# Voicebox Offline Mode Fix
+
+## Problem
+Voicebox crashes when generating speech if HuggingFace is unreachable, even when models are fully cached locally.
+
+**Root Cause:**
+- Voicebox downloads `mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16` (MLX optimized version)
+- But `mlx_audio.tts.load()` tries to fetch `config.json` from original repo `Qwen/Qwen3-TTS-12Hz-1.7B-Base`
+- This network request fails → server crashes with `RemoteDisconnected`
+
+**Related Issues:**
+- Issue #150: "Internet connection required, even though models are downloaded?"
+- Issue #151: "API Stability Issues: Model Loading Hangs and Server Crashes"
+
+## Solution
+Two-part fix:
+
+### 1. Monkey-patch huggingface_hub (`backend/utils/hf_offline_patch.py`)
+- Intercepts cache lookup functions
+- Forces offline mode early (before mlx_audio imports)
+- Adds debug logging for cache hits/misses
+
+### 2. Symlink original repo to MLX version (`ensure_original_qwen_config_cached()`)
+- When original `Qwen/Qwen3-TTS-12Hz-1.7B-Base` cache doesn't exist
+- But MLX `mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16` does exist
+- Creates a symlink so cache lookups succeed
+
+## Files Changed
+- `backend/backends/mlx_backend.py` - Added patch imports at top
+- `backend/utils/hf_offline_patch.py` - New patch module
+
+## Testing
+To test this fix:
+1. Build Voicebox from source: `make build`
+2. Disconnect from internet
+3. Try generating speech
+4. Should work without network requests
+
+## Build Instructions
+
+```bash
+# Install dependencies
+pip install -r requirements.txt
+
+# Build the app
+make build
+
+# Or build just the server
+make build-server
+```
+
+## Notes
+- The patch is applied automatically when `mlx_backend.py` is imported
+- Set `VOICEBOX_OFFLINE_PATCH=0` to disable the patch
+- The symlink approach works because the config.json is compatible between versions
+
+---
+*Patch contributed by community*
diff --git a/backend/backends/mlx_backend.py b/backend/backends/mlx_backend.py
@@ -5,8 +5,15 @@
 from typing import Optional, List, Tuple
 import asyncio
 import numpy as np
+import os
 from pathlib import Path
 
+# PATCH: Import and apply offline patch BEFORE any huggingface_hub usage
+# This prevents mlx_audio from making network requests when models are cached
+from ..utils.hf_offline_patch import patch_huggingface_hub_offline, ensure_original_qwen_config_cached
+patch_huggingface_hub_offline()
+ensure_original_qwen_config_cached()
+
 from . import TTSBackend, STTBackend
 from ..utils.cache import get_cache_key, get_cached_voice_prompt, cache_voice_prompt
 from ..utils.audio import normalize_audio, load_audio
@@ -159,15 +166,35 @@ def _load_model_sync(self, model_size: str):
             tracker_context = tracker.patch_download()
             tracker_context.__enter__()
 
+            # PATCH: Force offline mode when model is already cached
+            # This prevents crashes when HuggingFace is unreachable
+            original_hf_hub_offline = os.environ.get("HF_HUB_OFFLINE")
+            if is_cached:
+                os.environ["HF_HUB_OFFLINE"] = "1"
+                print(f"[PATCH] Model {model_size} is cached, forcing HF_HUB_OFFLINE=1 to avoid network requests")
+
             # Import mlx_audio AFTER patching tqdm
             from mlx_audio.tts import load
 
             # Load MLX model (downloads automatically)
             try:
                 self.model = load(model_path)
+            except Exception as load_error:
+                # If offline mode failed, try with network enabled as fallback
+                if is_cached and "offline" in str(load_error).lower():
+                    print(f"[PATCH] Offline load failed, trying with network: {load_error}")
+                    os.environ.pop("HF_HUB_OFFLINE", None)
+                    self.model = load(model_path)
+                else:
+                    raise
             finally:
                 # Exit the patch context
                 tracker_context.__exit__(None, None, None)
+                # Restore original HF_HUB_OFFLINE setting
+                if original_hf_hub_offline is not None:
+                    os.environ["HF_HUB_OFFLINE"] = original_hf_hub_offline
+                else:
+                    os.environ.pop("HF_HUB_OFFLINE", None)
 
             # Only mark download as complete if we were tracking it
             if not is_cached:

diff --git a/backend/utils/hf_offline_patch.py b/backend/utils/hf_offline_patch.py
@@ -0,0 +1,100 @@
+"""
+Monkey patch for huggingface_hub to force offline mode with cached models.
+This prevents mlx_audio from making network requests when models are already downloaded.
+"""
+
+import os
+from pathlib import Path
+from typing import Optional, Union
+
+
+def patch_huggingface_hub_offline():
+    """
+    Monkey-patch huggingface_hub to force offline mode.
+    This must be called BEFORE importing mlx_audio.
+    """
+    try:
+        import huggingface_hub
+        from huggingface_hub import constants as hf_constants
+        from huggingface_hub.file_download import _try_to_load_from_cache
+
+        # Store original function
+        original_try_load = _try_to_load_from_cache
+
+        def _patched_try_to_load_from_cache(
+            repo_id: str,
+            filename: str,
+            cache_dir: Union[str, Path, None] = None,
+            revision: Optional[str] = None,
+            repo_type: Optional[str] = None,
+        ):
+            """
+            Patched version that forces offline mode.
+            Returns None if not cached (instead of making network request).
+            """
+            # Always use the original function, but we're already in HF_HUB_OFFLINE mode
+            result = original_try_load(
+                repo_id=repo_id,
+                filename=filename,
+                cache_dir=cache_dir,
+                revision=revision,
+                repo_type=repo_type,
+            )
+
+            if result is None:
+                # File not in cache - log this for debugging
+                cache_path = Path(hf_constants.HF_HUB_CACHE) / f"models--{repo_id.replace('/', '--')}"
+                print(f"[HF_PATCH] File not cached: {repo_id}/{filename}")
+                print(f"[HF_PATCH] Expected at: {cache_path}")
+            else:
+                print(f"[HF_PATCH] Cache hit: {repo_id}/{filename}")
+
+            return result
+
+        # Replace the function
+        import huggingface_hub.file_download as fd
+        fd._try_to_load_from_cache = _patched_try_to_load_from_cache
+
+        print("[HF_PATCH] huggingface_hub patched for offline mode")
+
+    except ImportError:
+        print("[HF_PATCH] huggingface_hub not found, skipping patch")
+    except Exception as e:
+        print(f"[HF_PATCH] Error patching huggingface_hub: {e}")
+
+
+def ensure_original_qwen_config_cached():
+    """
+    The MLX community model is based on the original Qwen model.
+    mlx_audio may try to fetch config from the original repo.
+    We need to ensure that config is available in the cache.
+    """
+    from huggingface_hub import constants as hf_constants
+
+    # Original Qwen model that mlx_audio might reference
+    original_repo = "Qwen/Qwen3-TTS-12Hz-1.7B-Base"
+    mlx_repo = "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16"
+
+    cache_dir = Path(hf_constants.HF_HUB_CACHE)
+
+    original_path = cache_dir / f"models--{original_repo.replace('/', '--')}"
+    mlx_path = cache_dir / f"models--{mlx_repo.replace('/', '--')}"
+
+    # If original repo cache doesn't exist but MLX does, create a symlink or copy config
+    if not original_path.exists() and mlx_path.exists():
+        print(f"[HF_PATCH] Original repo not cached, but MLX version is")
+        print(f"[HF_PATCH] Creating symlink from {original_repo} -> {mlx_repo}")
+
+        try:
+            # Create a symlink so the cache lookup succeeds
+            original_path.parent.mkdir(parents=True, exist_ok=True)
+            original_path.symlink_to(mlx_path, target_is_directory=True)
+            print(f"[HF_PATCH] Symlink created successfully")
+        except Exception as e:
+            print(f"[HF_PATCH] Could not create symlink: {e}")
+
+
+# Auto-apply patch when module is imported
+if os.environ.get("VOICEBOX_OFFLINE_PATCH", "1") != "0":
+    patch_huggingface_hub_offline()
+    ensure_original_qwen_config_cached()