sgl-project · zhyncs · Apr 20, 2025 · Apr 19, 2025 · Apr 19, 2025 · Apr 20, 2025
@@ -88,7 +88,7 @@ jobs:
       - name: Install
         run: |
           bash scripts/ci_install_dependency.sh
-          pip3 install torch==2.5.1 && pip3 install pytest
+          pip3 install torch==2.6.0 && pip3 install pytest
           pip3 uninstall sgl-kernel -y || true
           pip3 install sgl-kernel/dist/*whl --force-reinstall --no-deps
           pip3 list | grep sgl-kernel

diff --git a/benchmark/deepseek_v3/README.md b/benchmark/deepseek_v3/README.md
@@ -33,7 +33,7 @@ Add [performance optimization options](#performance-optimization-options) as nee
 
 ```bash
 # Installation
-pip install "sglang[all]>=0.4.3" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python
+pip install "sglang[all]>=0.4.5.post1"
 
 # Launch
 python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code

@@ -43,6 +43,6 @@ RUN python3 -m pip install --upgrade pip setuptools wheel html5lib six \
        fi \
     && python3 -m pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cu${CUINDEX} \
     && cd sglang \
-    && python3 -m pip --no-cache-dir install -e "python[${BUILD_TYPE}]" --find-links https://flashinfer.ai/whl/cu${CUINDEX}/torch2.5/flashinfer-python
+    && python3 -m pip --no-cache-dir install -e "python[${BUILD_TYPE}]" --find-links https://flashinfer.ai/whl/cu${CUINDEX}/torch2.6/flashinfer-python
 
 ENV DEBIAN_FRONTEND=interactive
diff --git a/docs/start/install.md b/docs/start/install.md
@@ -164,4 +164,4 @@ sky status --endpoint 30000 sglang
 - [FlashInfer](https://github.com/flashinfer-ai/flashinfer) is the default attention kernel backend. It only supports sm75 and above. If you encounter any FlashInfer-related issues on sm75+ devices (e.g., T4, A10, A100, L4, L40S, H100), please switch to other kernels by adding `--attention-backend triton --sampling-backend pytorch` and open an issue on GitHub.
 - If you only need to use OpenAI models with the frontend language, you can avoid installing other dependencies by using `pip install "sglang[openai]"`.
 - The language frontend operates independently of the backend runtime. You can install the frontend locally without needing a GPU, while the backend can be set up on a GPU-enabled machine. To install the frontend, run `pip install sglang`, and for the backend, use `pip install sglang[srt]`. `srt` is the abbreviation of SGLang runtime.
-- To reinstall flashinfer locally, use the following command: `pip install "flashinfer-python>=0.2.3" -i https://flashinfer.ai/whl/cu124/torch2.5 --force-reinstall --no-deps` and then delete the cache with `rm -rf ~/.cache/flashinfer`.
+- To reinstall flashinfer locally, use the following command: `pip install "flashinfer-python==0.2.3" -i https://flashinfer.ai/whl/cu124/torch2.6 --force-reinstall --no-deps` and then delete the cache with `rm -rf ~/.cache/flashinfer`.
@@ -49,8 +49,8 @@ srt = [
     "sglang[runtime_common]",
     "sgl-kernel==0.0.9.post2",
     "flashinfer_python==0.2.3",
-    "torch==2.5.1",
-    "torchvision==0.20.1",
+    "torch==2.6.0",
+    "torchvision==0.21.0",
     "cuda-python",
     "outlines>=0.0.44,<=0.1.11",
     "partial_json_parser",

@@ -143,7 +143,7 @@ def memcpy_triton_kernel(
     src_ptr,
     offset_ptr,
     sz_ptr,
-    offset_src,
+    offset_src: tl.constexpr,
     chunk_size,  # multiplied for offset and sz
     BLOCK_SIZE: tl.constexpr,
 ):

diff --git a/sgl-kernel/build.sh b/sgl-kernel/build.sh
@@ -10,7 +10,7 @@ if [ ${CUDA_VERSION} = "12.8" ]; then
    TORCH_INSTALL="pip install --no-cache-dir --pre torch --index-url https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION//.}"
 else
    DOCKER_IMAGE="pytorch/manylinux-builder:cuda${CUDA_VERSION}"
-   TORCH_INSTALL="pip install --no-cache-dir torch==2.5.1 --index-url https://download.pytorch.org/whl/cu${CUDA_VERSION//.}"
+   TORCH_INSTALL="pip install --no-cache-dir torch==2.6.0 --index-url https://download.pytorch.org/whl/cu${CUDA_VERSION//.}"
 fi
 
 docker run --rm \