feat: Add flexible validation for partial weight updates #29418
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: PR Test | |
on: | |
push: | |
branches: [ main ] | |
pull_request: | |
branches: [ main ] | |
workflow_dispatch: | |
inputs: | |
version: | |
description: "FlashInfer version" | |
required: true | |
type: choice | |
default: 'release' | |
options: | |
- 'release' | |
- 'nightly' | |
concurrency: | |
group: pr-test-${{ github.ref }} | |
cancel-in-progress: true | |
jobs: | |
check-changes: | |
runs-on: ubuntu-latest | |
outputs: | |
src: ${{ steps.filter.outputs.src }} | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Detect file changes | |
id: filter | |
uses: dorny/paths-filter@v3 | |
with: | |
filters: | | |
src: | |
- "python/**" | |
- "scripts/**" | |
- "test/**" | |
- ".github/workflows/pr-test.yml" | |
unit-test-frontend: | |
needs: check-changes | |
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && | |
github.event.pull_request.draft == false && | |
needs.check-changes.outputs.src == 'true' | |
runs-on: 1-gpu-runner | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
bash scripts/ci/ci_install_dependency.sh | |
- name: Run test | |
timeout-minutes: 10 | |
run: | | |
cd test/lang | |
python3 run_suite.py --suite per-commit | |
unit-test-backend-1-gpu: | |
needs: [check-changes, unit-test-frontend] | |
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && | |
github.event.pull_request.draft == false && | |
needs.check-changes.outputs.src == 'true' | |
runs-on: 1-gpu-runner | |
strategy: | |
fail-fast: false | |
matrix: | |
part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
bash scripts/ci/ci_install_dependency.sh | |
- name: Run test | |
timeout-minutes: 30 | |
run: | | |
cd test/srt | |
python3 run_suite.py --suite per-commit --auto-partition-id ${{ matrix.part }} --auto-partition-size 10 | |
unit-test-backend-2-gpu: | |
needs: [check-changes] | |
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && | |
github.event.pull_request.draft == false && | |
needs.check-changes.outputs.src == 'true' | |
runs-on: 2-gpu-runner | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
bash scripts/ci/ci_install_dependency.sh | |
- name: Run test | |
timeout-minutes: 30 | |
run: | | |
cd test/srt | |
python3 run_suite.py --suite per-commit-2-gpu | |
unit-test-backend-4-gpu: | |
needs: [check-changes, unit-test-backend-2-gpu] | |
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && | |
github.event.pull_request.draft == false && | |
needs.check-changes.outputs.src == 'true' | |
runs-on: 4-gpu-runner | |
strategy: | |
fail-fast: false | |
matrix: | |
part: [0, 1] | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
bash scripts/ci/ci_install_dependency.sh | |
- name: Run test | |
timeout-minutes: 20 | |
run: | | |
cd test/srt | |
python3 run_suite.py --suite per-commit-4-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 | |
unit-test-backend-8-gpu: | |
needs: [check-changes, unit-test-backend-2-gpu] | |
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && | |
github.event.pull_request.draft == false && | |
needs.check-changes.outputs.src == 'true' | |
runs-on: 8-gpu-runner | |
strategy: | |
fail-fast: false | |
matrix: | |
part: [0, 1] | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
bash scripts/ci/ci_install_dependency.sh | |
- name: Run test | |
timeout-minutes: 20 | |
run: | | |
cd test/srt | |
python3 run_suite.py --suite per-commit-8-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 | |
performance-test-1-gpu-part-1: | |
needs: check-changes | |
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && | |
github.event.pull_request.draft == false && | |
needs.check-changes.outputs.src == 'true' | |
runs-on: 1-gpu-runner | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
bash scripts/ci/ci_install_dependency.sh | |
- name: Benchmark single latency | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_small | |
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_default | |
- name: Benchmark online latency | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_default | |
- name: Benchmark offline throughput | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default | |
- name: Benchmark offline throughput (Non-streaming, small batch size) | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size | |
- name: Benchmark online latency (EAGLE) | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_eagle | |
- name: Benchmark online latency (LoRA) | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_lora_online_latency | |
python3 -m unittest test_bench_serving.TestBenchServing.test_lora_online_latency_with_concurrent_adapter_updates | |
performance-test-1-gpu-part-2: | |
needs: check-changes | |
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && | |
github.event.pull_request.draft == false && | |
needs.check-changes.outputs.src == 'true' | |
runs-on: 1-gpu-runner | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
bash scripts/ci/ci_install_dependency.sh | |
- name: Benchmark offline throughput (w/o RadixAttention) | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_without_radix_cache | |
- name: Benchmark offline throughput (w/ Triton) | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_with_triton_attention_backend | |
- name: Benchmark offline throughput (w/ FP8) | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8 | |
- name: Benchmark VLM offline throughput | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_vlm_offline_throughput | |
- name: Benchmark VLM online latency | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_vlm_online_latency | |
performance-test-2-gpu: | |
needs: [check-changes, unit-test-backend-2-gpu] | |
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && | |
github.event.pull_request.draft == false && | |
needs.check-changes.outputs.src == 'true' | |
runs-on: 2-gpu-runner | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
bash scripts/ci/ci_install_dependency.sh | |
- name: Benchmark single latency (TP=2) | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1 | |
- name: Benchmark single latency + torch.compile (TP=2) | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1 | |
- name: Benchmark offline throughput (TP=2) | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_default | |
- name: Benchmark offline throughput (w/o RadixAttention) (TP=2) | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache | |
- name: Benchmark offline PP decode throughput (PP=2) | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_pp_offline_throughput_default_decode | |
- name: Benchmark offline PP prefill throughput (PP=2) | |
timeout-minutes: 10 | |
run: | | |
cd test/srt | |
python3 -m unittest test_bench_serving.TestBenchServing.test_pp_long_context_prefill | |
accuracy-test-1-gpu: | |
needs: check-changes | |
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && | |
github.event.pull_request.draft == false && | |
needs.check-changes.outputs.src == 'true' | |
runs-on: 1-gpu-runner | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
bash scripts/ci/ci_install_dependency.sh | |
git clone https://github.com/merrymercy/human-eval.git | |
cd human-eval | |
pip install -e . | |
- name: Evaluate accuracy | |
timeout-minutes: 20 | |
run: | | |
cd test/srt | |
python3 test_eval_accuracy_large.py | |
accuracy-test-2-gpu: | |
needs: [check-changes, accuracy-test-1-gpu] | |
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && | |
github.event.pull_request.draft == false && | |
needs.check-changes.outputs.src == 'true' | |
runs-on: 2-gpu-runner | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
bash scripts/ci/ci_install_dependency.sh | |
git clone https://github.com/merrymercy/human-eval.git | |
cd human-eval | |
pip install -e . | |
- name: Evaluate accuracy (TP=2) | |
timeout-minutes: 20 | |
run: | | |
cd test/srt | |
python3 test_moe_eval_accuracy_large.py | |
unit-test-deepep-4-gpu: | |
needs: [check-changes, unit-test-backend-2-gpu] | |
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && | |
github.event.pull_request.draft == false && | |
needs.check-changes.outputs.src == 'true' | |
runs-on: 4-gpu-runner | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
bash scripts/ci/ci_install_deepep.sh | |
- name: Run test | |
timeout-minutes: 20 | |
run: | | |
cd test/srt | |
python3 run_suite.py --suite per-commit-4-gpu-deepep | |
unit-test-deepep-8-gpu: | |
needs: [check-changes, unit-test-backend-2-gpu] | |
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && | |
github.event.pull_request.draft == false && | |
needs.check-changes.outputs.src == 'true' | |
runs-on: 8-gpu-runner | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
bash scripts/ci/ci_install_deepep.sh | |
- name: Run test | |
timeout-minutes: 20 | |
run: | | |
cd test/srt | |
python3 run_suite.py --suite per-commit-8-gpu-deepep | |
unit-test-backend-8-gpu-b200: | |
needs: [check-changes, unit-test-backend-2-gpu] | |
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && | |
github.event.pull_request.draft == false && | |
needs.check-changes.outputs.src == 'true' | |
runs-on: b200-runner | |
strategy: | |
fail-fast: false | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh | |
- name: Run test | |
timeout-minutes: 20 | |
run: | | |
cd test/srt | |
python3 run_suite.py --suite per-commit-8-gpu-b200 --auto-partition-id 0 --auto-partition-size 1 | |
pr-test-finish: | |
needs: [ | |
check-changes, | |
unit-test-frontend, unit-test-backend-1-gpu, | |
unit-test-backend-2-gpu, unit-test-backend-4-gpu, unit-test-backend-8-gpu, | |
performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-2-gpu, | |
accuracy-test-1-gpu, accuracy-test-2-gpu, | |
unit-test-deepep-4-gpu, unit-test-deepep-8-gpu, | |
unit-test-backend-8-gpu-b200, | |
] | |
if: needs.check-changes.outputs.src == 'true' | |
runs-on: ubuntu-latest | |
steps: | |
- name: Check all dependent job statuses | |
run: | | |
results=(${{ join(needs.*.result, ' ') }}) | |
for result in "${results[@]}"; do | |
if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then | |
echo "Job failed with result: $result" | |
exit 1 | |
fi | |
done | |
echo "All jobs completed successfully" | |
exit 0 |