From 12921272a709b625bfa8480a5353ad247d4de2f8 Mon Sep 17 00:00:00 2001 From: anvdn Date: Sun, 1 Mar 2026 15:16:03 -0500 Subject: [PATCH 1/2] disable deepgemm for blackwell GPU as it requires CUDA toolkit --- .../sglang/srt/layers/deep_gemm_wrapper/configurer.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/layers/deep_gemm_wrapper/configurer.py b/python/sglang/srt/layers/deep_gemm_wrapper/configurer.py index 9bb34046d51c..494b884261d5 100644 --- a/python/sglang/srt/layers/deep_gemm_wrapper/configurer.py +++ b/python/sglang/srt/layers/deep_gemm_wrapper/configurer.py @@ -18,8 +18,14 @@ def _compute_enable_deep_gemm(): return envs.SGLANG_ENABLE_JIT_DEEPGEMM.get() - -ENABLE_JIT_DEEPGEMM = _compute_enable_deep_gemm() +# deepgemm requires CUDA toolkit and will error out on Blackwell GPU if missing. +# File "/.pyenv/versions/3.12.7/lib/python3.12/site-packages/deep_gemm/__init__.py", line 42, in _ensure_initialized +# torch.ops.deep_gemm.init(library_root, _find_cuda_home()) +# ^^^^^^^^^^^^^^^^^ +# File "/.pyenv/versions/3.12.7/lib/python3.12/site-packages/deep_gemm/__init__.py", line 30, in _find_cuda_home +# assert cuda_home is not None +# ^^^^^^^^^^^^^^^^^^^^^ +ENABLE_JIT_DEEPGEMM = False DEEPGEMM_BLACKWELL = ENABLE_JIT_DEEPGEMM and is_blackwell() DEEPGEMM_SCALE_UE8M0 = DEEPGEMM_BLACKWELL From e359d79aa2e2b105d2748cd52a60b8bf13d7898d Mon Sep 17 00:00:00 2001 From: anvdn Date: Sun, 1 Mar 2026 15:17:04 -0500 Subject: [PATCH 2/2] Bump SGLang version to 0.5.5.post3+hs7 --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index fee1b6e7ae8b..5acd2704bc26 100755 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta" [project] name = "sglang" -version = "0.5.5.post3+hs5" +version = "0.5.5.post3+hs7" description = "SGLang is a fast serving framework for large language models and vision language models." readme = "README.md" requires-python = ">=3.10"