From 41617c0d74344b3bcc3b4f1c33a11c43cc3ab91c Mon Sep 17 00:00:00 2001 From: Ric Date: Sat, 10 Jan 2026 14:27:07 -0800 Subject: [PATCH] fix: make bitsandbytes optional for CPU-only systems - Make bitsandbytes import conditional with graceful fallback - Exclude bitsandbytes dependency on Intel Macs where it can't work - Provide clear error message when quantization requested without bitsandbytes - Add documentation for CPU-only usage (device_map = "cpu") This allows Heretic to run on systems without CUDA support by using device_map = "cpu" and quantization = "none". Closes #12 --- config.default.toml | 2 ++ pyproject.toml | 3 ++- src/heretic/model.py | 16 +++++++++++++++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/config.default.toml b/config.default.toml index 8a5efce..e284d2c 100644 --- a/config.default.toml +++ b/config.default.toml @@ -16,10 +16,12 @@ dtypes = [ ] # Device map to pass to Accelerate when loading the model. +# Use "cpu" for CPU-only systems without GPU support. device_map = "auto" # Quantization method to use when loading the model. # Options: "none" (no quantization), "bnb_4bit" (4-bit quantization using bitsandbytes). +# Note: 4-bit quantization requires bitsandbytes, which needs CUDA support. quantization = "none" # Memory limits to impose. 0 is usually your first graphics card. diff --git a/pyproject.toml b/pyproject.toml index 2a43f37..e41ff11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,8 @@ classifiers = [ ] dependencies = [ "accelerate>=1.10.0", - "bitsandbytes>=0.45.0", + # bitsandbytes requires CUDA or Apple Silicon. Exclude Intel Macs where it won't work. + "bitsandbytes>=0.45.0; (platform_system != 'Darwin') or (platform_machine == 'arm64')", "datasets>=4.0.0", "hf-transfer>=0.1.9", "huggingface-hub>=0.34.4", diff --git a/src/heretic/model.py b/src/heretic/model.py index 9f15597..c6c2020 100644 --- a/src/heretic/model.py +++ b/src/heretic/model.py @@ -6,8 +6,16 @@ from dataclasses import dataclass from typing import Any, cast -import bitsandbytes as bnb import torch + +# bitsandbytes is optional - only available on systems with CUDA support. +try: + import bitsandbytes as bnb + + HAS_BITSANDBYTES = True +except ImportError: + bnb = None # type: ignore[assignment] + HAS_BITSANDBYTES = False import torch.nn.functional as F from peft import LoraConfig, PeftModel, get_peft_model from peft.tuners.lora.layer import Linear @@ -181,6 +189,12 @@ def _get_quantization_config(self, dtype: str) -> BitsAndBytesConfig | None: BitsAndBytesConfig or None """ if self.settings.quantization == QuantizationMethod.BNB_4BIT: + if not HAS_BITSANDBYTES: + raise RuntimeError( + "4-bit quantization requires bitsandbytes, which is not available. " + "Install it with 'pip install bitsandbytes' (requires CUDA) " + "or set quantization = 'none' in your config." + ) # BitsAndBytesConfig expects a torch.dtype, not a string. if dtype == "auto": compute_dtype = torch.bfloat16