Skip to content

Commit ce6119c

Browse files
zhyncsxwu-intel
authored andcommitted
chore: upgrade sgl-kernel 0.1.1 (sgl-project#5933)
1 parent 5691b95 commit ce6119c

File tree

7 files changed

+20
-14
lines changed

7 files changed

+20
-14
lines changed

.github/workflows/vllm-dependency-test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
- name: Install dependencies
3131
run: |
3232
bash scripts/ci_install_dependency.sh
33-
pip install "vllm>=0.6.4.post1,<=0.7.2"
33+
pip install "vllm==0.8.4"
3434
pip install "bitsandbytes>=0.44.0"
3535
3636
- name: Run VLLM dependency tests

python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ runtime_common = [
4747

4848
srt = [
4949
"sglang[runtime_common]",
50-
"sgl-kernel==0.1.0",
50+
"sgl-kernel==0.1.1",
5151
"flashinfer_python==0.2.5",
5252
"torch==2.6.0",
5353
"torchvision==0.21.0",

python/sglang/srt/entrypoints/engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ def _set_envs_and_config(server_args: ServerArgs):
461461
if _is_cuda:
462462
assert_pkg_version(
463463
"sgl-kernel",
464-
"0.1.0",
464+
"0.1.1",
465465
"Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`",
466466
)
467467

python/sglang/srt/layers/quantization/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
109109
if quantization in VLLM_QUANTIZATION_METHODS and not VLLM_AVAILABLE:
110110
raise ValueError(
111111
f"{quantization} quantization requires some operators from vllm. "
112-
"Pleaes install vllm by `pip install vllm==0.7.2`"
112+
"Pleaes install vllm by `pip install vllm==0.8.4`"
113113
)
114114

115115
return QUANTIZATION_METHODS[quantization]
@@ -310,7 +310,7 @@ def new_apply(
310310
if correction_bias is not None:
311311
if not has_correction_bias:
312312
raise ValueError(
313-
"Please increase the version of your vllm. Try `pip install vllm==0.7.2`"
313+
"Please increase the version of your vllm. Try `pip install vllm==0.8.4`"
314314
)
315315
kwargs["e_score_correction_bias"] = correction_bias
316316
return original_apply(**kwargs)

python/sglang/srt/model_executor/model_runner.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
get_available_gpu_memory,
8080
get_bool_env_var,
8181
init_custom_process_group,
82+
is_ampere_with_cuda_12_3,
8283
is_cuda,
8384
is_fa3_default_architecture,
8485
is_flashinfer_available,
@@ -246,7 +247,7 @@ def model_specific_adjustment(self):
246247
if not self.use_mla_backend:
247248
# MHA architecture
248249
if (
249-
is_hopper_with_cuda_12_3()
250+
(is_ampere_with_cuda_12_3() or is_hopper_with_cuda_12_3())
250251
and is_no_spec_infer_or_topk_one(server_args)
251252
and is_fa3_default_architecture(self.model_config.hf_config)
252253
):
@@ -927,8 +928,10 @@ def init_attention_backend(self):
927928

928929
self.attn_backend = FlashMLABackend(self)
929930
elif self.server_args.attention_backend == "fa3":
930-
assert torch.cuda.get_device_capability()[0] >= 9, (
931-
"FlashAttention v3 Backend requires SM>=90. "
931+
assert (
932+
torch.cuda.get_device_capability()[0] == 8 and not self.use_mla_backend
933+
) or torch.cuda.get_device_capability()[0] == 9, (
934+
"FlashAttention v3 Backend requires SM>=80 and SM<=90. "
932935
"Please use `--attention-backend flashinfer`."
933936
)
934937
from sglang.srt.layers.attention.flashattention_backend import (

python/sglang/srt/utils.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1905,13 +1905,16 @@ def fast_topk(values, topk, dim):
19051905
return torch.topk(values, topk, dim=dim)
19061906

19071907

1908-
def is_hopper_with_cuda_12_3():
1908+
def _check(cc_major):
19091909
if not is_cuda():
19101910
return False
1911-
is_hopper = torch.cuda.get_device_capability()[0] == 9
1912-
cuda_version = torch.version.cuda.split(".")
1913-
is_cuda_compatible = int(cuda_version[0]) == 12 and int(cuda_version[1]) >= 3
1914-
return is_hopper and is_cuda_compatible
1911+
return torch.cuda.get_device_capability()[0] == cc_major and tuple(
1912+
map(int, torch.version.cuda.split(".")[:2])
1913+
) >= (12, 3)
1914+
1915+
1916+
is_ampere_with_cuda_12_3 = lambda: _check(8)
1917+
is_hopper_with_cuda_12_3 = lambda: _check(9)
19151918

19161919

19171920
def get_free_port():

scripts/ci_install_dependency.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ rm -rf /usr/local/lib/python3.10/dist-packages/sgl_kernel*
1616
pip install --upgrade pip
1717

1818
# Install sgl-kernel
19-
pip install sgl-kernel==0.1.0 --no-cache-dir
19+
pip install sgl-kernel==0.1.1 --no-cache-dir
2020

2121
# Install the main package
2222
pip install -e "python[all]"

0 commit comments

Comments
 (0)