Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions PATCH_NOTES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Voicebox Offline Mode Fix

## Problem
Voicebox crashes when generating speech if HuggingFace is unreachable, even when models are fully cached locally.

**Root Cause:**
- Voicebox downloads `mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16` (MLX optimized version)
- But `mlx_audio.tts.load()` tries to fetch `config.json` from original repo `Qwen/Qwen3-TTS-12Hz-1.7B-Base`
- This network request fails → server crashes with `RemoteDisconnected`

**Related Issues:**
- Issue #150: "Internet connection required, even though models are downloaded?"
- Issue #151: "API Stability Issues: Model Loading Hangs and Server Crashes"

## Solution
Two-part fix:

### 1. Monkey-patch huggingface_hub (`backend/utils/hf_offline_patch.py`)
- Intercepts cache lookup functions
- Forces offline mode early (before mlx_audio imports)
- Adds debug logging for cache hits/misses

### 2. Symlink original repo to MLX version (`ensure_original_qwen_config_cached()`)
- When original `Qwen/Qwen3-TTS-12Hz-1.7B-Base` cache doesn't exist
- But MLX `mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16` does exist
- Creates a symlink so cache lookups succeed

## Files Changed
- `backend/backends/mlx_backend.py` - Added patch imports at top
- `backend/utils/hf_offline_patch.py` - New patch module

## Testing
To test this fix:
1. Build Voicebox from source: `make build`
2. Disconnect from internet
3. Try generating speech
4. Should work without network requests

## Build Instructions

```bash
# Install dependencies
pip install -r requirements.txt

# Build the app
make build

# Or build just the server
make build-server
```

## Notes
- The patch is applied automatically when `mlx_backend.py` is imported
- Set `VOICEBOX_OFFLINE_PATCH=0` to disable the patch
- The symlink approach works because the config.json is compatible between versions

---
*Patch contributed by community*
27 changes: 27 additions & 0 deletions backend/backends/mlx_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,15 @@
from typing import Optional, List, Tuple
import asyncio
import numpy as np
import os
from pathlib import Path

# PATCH: Import and apply offline patch BEFORE any huggingface_hub usage
# This prevents mlx_audio from making network requests when models are cached
from ..utils.hf_offline_patch import patch_huggingface_hub_offline, ensure_original_qwen_config_cached
patch_huggingface_hub_offline()
ensure_original_qwen_config_cached()

from . import TTSBackend, STTBackend
from ..utils.cache import get_cache_key, get_cached_voice_prompt, cache_voice_prompt
from ..utils.audio import normalize_audio, load_audio
Expand Down Expand Up @@ -159,15 +166,35 @@ def _load_model_sync(self, model_size: str):
tracker_context = tracker.patch_download()
tracker_context.__enter__()

# PATCH: Force offline mode when model is already cached
# This prevents crashes when HuggingFace is unreachable
original_hf_hub_offline = os.environ.get("HF_HUB_OFFLINE")
if is_cached:
os.environ["HF_HUB_OFFLINE"] = "1"
print(f"[PATCH] Model {model_size} is cached, forcing HF_HUB_OFFLINE=1 to avoid network requests")

# Import mlx_audio AFTER patching tqdm
from mlx_audio.tts import load

# Load MLX model (downloads automatically)
try:
self.model = load(model_path)
except Exception as load_error:
# If offline mode failed, try with network enabled as fallback
if is_cached and "offline" in str(load_error).lower():
print(f"[PATCH] Offline load failed, trying with network: {load_error}")
os.environ.pop("HF_HUB_OFFLINE", None)
self.model = load(model_path)
else:
raise
finally:
# Exit the patch context
tracker_context.__exit__(None, None, None)
# Restore original HF_HUB_OFFLINE setting
if original_hf_hub_offline is not None:
os.environ["HF_HUB_OFFLINE"] = original_hf_hub_offline
else:
os.environ.pop("HF_HUB_OFFLINE", None)

# Only mark download as complete if we were tracking it
if not is_cached:
Expand Down
100 changes: 100 additions & 0 deletions backend/utils/hf_offline_patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""
Monkey patch for huggingface_hub to force offline mode with cached models.
This prevents mlx_audio from making network requests when models are already downloaded.
"""

import os
from pathlib import Path
from typing import Optional, Union


def patch_huggingface_hub_offline():
"""
Monkey-patch huggingface_hub to force offline mode.
This must be called BEFORE importing mlx_audio.
"""
try:
import huggingface_hub
from huggingface_hub import constants as hf_constants
from huggingface_hub.file_download import _try_to_load_from_cache

# Store original function
original_try_load = _try_to_load_from_cache

def _patched_try_to_load_from_cache(
repo_id: str,
filename: str,
cache_dir: Union[str, Path, None] = None,
revision: Optional[str] = None,
repo_type: Optional[str] = None,
):
"""
Patched version that forces offline mode.
Returns None if not cached (instead of making network request).
"""
# Always use the original function, but we're already in HF_HUB_OFFLINE mode
result = original_try_load(
repo_id=repo_id,
filename=filename,
cache_dir=cache_dir,
revision=revision,
repo_type=repo_type,
)

if result is None:
# File not in cache - log this for debugging
cache_path = Path(hf_constants.HF_HUB_CACHE) / f"models--{repo_id.replace('/', '--')}"
print(f"[HF_PATCH] File not cached: {repo_id}/{filename}")
print(f"[HF_PATCH] Expected at: {cache_path}")
else:
print(f"[HF_PATCH] Cache hit: {repo_id}/{filename}")

return result

# Replace the function
import huggingface_hub.file_download as fd
fd._try_to_load_from_cache = _patched_try_to_load_from_cache

print("[HF_PATCH] huggingface_hub patched for offline mode")

except ImportError:
print("[HF_PATCH] huggingface_hub not found, skipping patch")
except Exception as e:
print(f"[HF_PATCH] Error patching huggingface_hub: {e}")


def ensure_original_qwen_config_cached():
"""
The MLX community model is based on the original Qwen model.
mlx_audio may try to fetch config from the original repo.
We need to ensure that config is available in the cache.
"""
from huggingface_hub import constants as hf_constants

# Original Qwen model that mlx_audio might reference
original_repo = "Qwen/Qwen3-TTS-12Hz-1.7B-Base"
mlx_repo = "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16"

cache_dir = Path(hf_constants.HF_HUB_CACHE)

original_path = cache_dir / f"models--{original_repo.replace('/', '--')}"
mlx_path = cache_dir / f"models--{mlx_repo.replace('/', '--')}"

# If original repo cache doesn't exist but MLX does, create a symlink or copy config
if not original_path.exists() and mlx_path.exists():
print(f"[HF_PATCH] Original repo not cached, but MLX version is")
print(f"[HF_PATCH] Creating symlink from {original_repo} -> {mlx_repo}")

try:
# Create a symlink so the cache lookup succeeds
original_path.parent.mkdir(parents=True, exist_ok=True)
original_path.symlink_to(mlx_path, target_is_directory=True)
print(f"[HF_PATCH] Symlink created successfully")
except Exception as e:
print(f"[HF_PATCH] Could not create symlink: {e}")


# Auto-apply patch when module is imported
if os.environ.get("VOICEBOX_OFFLINE_PATCH", "1") != "0":
patch_huggingface_hub_offline()
ensure_original_qwen_config_cached()