From 41617c0d74344b3bcc3b4f1c33a11c43cc3ab91c Mon Sep 17 00:00:00 2001
From: Ric <ricyoung@gmail.com>
Date: Sat, 10 Jan 2026 14:27:07 -0800
Subject: [PATCH] fix: make bitsandbytes optional for CPU-only systems

- Make bitsandbytes import conditional with graceful fallback
- Exclude bitsandbytes dependency on Intel Macs where it can't work
- Provide clear error message when quantization requested without bitsandbytes
- Add documentation for CPU-only usage (device_map = "cpu")

This allows Heretic to run on systems without CUDA support by using
device_map = "cpu" and quantization = "none".

Closes #12
---
 config.default.toml  |  2 ++
 pyproject.toml       |  3 ++-
 src/heretic/model.py | 16 +++++++++++++++-
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/config.default.toml b/config.default.toml
index 8a5efce..e284d2c 100644
--- a/config.default.toml
+++ b/config.default.toml
@@ -16,10 +16,12 @@ dtypes = [
 ]
 
 # Device map to pass to Accelerate when loading the model.
+# Use "cpu" for CPU-only systems without GPU support.
 device_map = "auto"
 
 # Quantization method to use when loading the model.
 # Options: "none" (no quantization), "bnb_4bit" (4-bit quantization using bitsandbytes).
+# Note: 4-bit quantization requires bitsandbytes, which needs CUDA support.
 quantization = "none"
 
 # Memory limits to impose. 0 is usually your first graphics card.
diff --git a/pyproject.toml b/pyproject.toml
index 2a43f37..e41ff11 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,7 +23,8 @@ classifiers = [
 ]
 dependencies = [
     "accelerate>=1.10.0",
-    "bitsandbytes>=0.45.0",
+    # bitsandbytes requires CUDA or Apple Silicon. Exclude Intel Macs where it won't work.
+    "bitsandbytes>=0.45.0; (platform_system != 'Darwin') or (platform_machine == 'arm64')",
     "datasets>=4.0.0",
     "hf-transfer>=0.1.9",
     "huggingface-hub>=0.34.4",
diff --git a/src/heretic/model.py b/src/heretic/model.py
index 9f15597..c6c2020 100644
--- a/src/heretic/model.py
+++ b/src/heretic/model.py
@@ -6,8 +6,16 @@
 from dataclasses import dataclass
 from typing import Any, cast
 
-import bitsandbytes as bnb
 import torch
+
+# bitsandbytes is optional - only available on systems with CUDA support.
+try:
+    import bitsandbytes as bnb
+
+    HAS_BITSANDBYTES = True
+except ImportError:
+    bnb = None  # type: ignore[assignment]
+    HAS_BITSANDBYTES = False
 import torch.nn.functional as F
 from peft import LoraConfig, PeftModel, get_peft_model
 from peft.tuners.lora.layer import Linear
@@ -181,6 +189,12 @@ def _get_quantization_config(self, dtype: str) -> BitsAndBytesConfig | None:
             BitsAndBytesConfig or None
         """
         if self.settings.quantization == QuantizationMethod.BNB_4BIT:
+            if not HAS_BITSANDBYTES:
+                raise RuntimeError(
+                    "4-bit quantization requires bitsandbytes, which is not available. "
+                    "Install it with 'pip install bitsandbytes' (requires CUDA) "
+                    "or set quantization = 'none' in your config."
+                )
             # BitsAndBytesConfig expects a torch.dtype, not a string.
             if dtype == "auto":
                 compute_dtype = torch.bfloat16