[router] add tool parser base structure and partial json parser (#9482) #8

Workflow file for this run

.github/workflows/pr-test-pd-router.yml at 816c4c8

	name: PR Test (PD Router)

	on:
	push:
	branches: [ main ]
	paths:
	- 'python/sglang/srt/disaggregation/**'
	- 'scripts/ci/ci_start_disaggregation_servers.sh'
	- 'sgl-router/**'
	pull_request:
	branches: [ main ]
	paths:
	- 'python/sglang/srt/disaggregation/**'
	- 'scripts/ci/ci_start_disaggregation_servers.sh'
	- 'sgl-router/**'
	workflow_dispatch:

	concurrency:
	group: test-disaggregation-${{ github.ref }}
	cancel-in-progress: true

	permissions:
	contents: read
	pull-requests: write
	issues: write

	jobs:
	test-disaggregation:
	if: (github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request') &&
	github.event.pull_request.draft == false
	runs-on: [h200]
	timeout-minutes: 45

	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	fetch-depth: 10

	- name: Setup Python
	uses: actions/setup-python@v4
	with:
	python-version: '3.12'

	- name: Setup Rust
	run: \|
	bash scripts/ci/ci_install_rust.sh

	- name: Cache Rust dependencies
	uses: actions/cache@v4
	with:
	path: \|
	~/.cargo/bin/
	~/.cargo/registry/index/
	~/.cargo/registry/cache/
	~/.cargo/git/db/
	sgl-router/target/
	key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
	restore-keys: \|
	${{ runner.os }}-cargo-

	- name: Cache pip dependencies
	uses: actions/cache@v4
	with:
	path: ~/.cache/pip
	key: ${{ runner.os }}-pip-${{ hashFiles('python/pyproject.toml') }}
	restore-keys: \|
	${{ runner.os }}-pip-

	- name: Validate environment
	run: \|
	echo "=== System Validation ==="
	nvidia-smi
	echo "GPU count: $(nvidia-smi -L \| wc -l)"
	if [ $(nvidia-smi -L \| wc -l) -lt 8 ]; then
	echo "Error: This test requires at least 8 GPUs"
	exit 1
	fi

	echo "=== RDMA Validation ==="
	if ! command -v ibv_devices >/dev/null 2>&1; then
	echo "Error: InfiniBand tools not found"
	exit 1
	fi

	# Check for active IB devices
	found_active_device=false
	for device in mlx5_{0..11}; do
	if ibv_devinfo $device >/dev/null 2>&1; then
	state=$(ibv_devinfo $device \| grep "state:" \| head -1 \| awk '{print $2}')
	if [[ "$state" == "PORT_ACTIVE" ]]; then
	echo "✓ Found active device: $device"
	found_active_device=true
	break
	fi
	fi
	done

	if [ "$found_active_device" = false ]; then
	echo "Error: No active IB devices found"
	echo "Available devices:"
	ibv_devices \|\| true
	exit 1
	fi

	echo "=== Model Validation ==="
	if [ ! -d "/raid/models/meta-llama/Llama-3.1-8B-Instruct" ]; then
	echo "Error: Model not found"
	ls -la /raid/models/ \|\| echo "No models directory"
	exit 1
	fi
	echo "✓ Model found"

	- name: Install SGLang dependencies
	run: \|
	echo "Installing SGLang with all extras..."
	python3 -m pip --no-cache-dir install --upgrade pip
	python3 -m pip --no-cache-dir install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126
	python3 -m pip --no-cache-dir install -e "python[all]" --break-system-packages
	python3 -m pip --no-cache-dir install mooncake-transfer-engine==0.3.5
	python3 -m pip --no-cache-dir install --user --force-reinstall genai-bench==0.0.1
	python3 -m pip --no-cache-dir install sgl-kernel==0.3.5

	- name: Build and install sgl-router
	run: \|
	source "$HOME/.cargo/env"
	echo "Building sgl-router..."
	cd sgl-router
	cargo build && python3 -m build && pip install --force-reinstall dist/*.whl

	- name: Start disaggregation servers
	id: start_servers
	run: \|
	echo "Starting disaggregation servers..."
	bash scripts/ci/ci_start_disaggregation_servers.sh &
	SERVER_PID=$!
	echo "server_pid=$SERVER_PID" >> $GITHUB_OUTPUT

	# Wait for all 8 servers to be healthy (script already does this)
	wait_count=0
	while [ $wait_count -lt 30 ]; do
	if ps -p $SERVER_PID > /dev/null; then
	# Check if the startup script printed success message
	sleep 2
	wait_count=$((wait_count + 1))
	else
	# Script exited - check if it was successful
	wait $SERVER_PID
	exit_code=$?
	if [ $exit_code -eq 0 ]; then
	echo "✓ All disaggregation servers are healthy"
	break
	else
	echo "Error: Server startup failed with code $exit_code"
	exit 1
	fi
	fi
	done

	echo "✓ Servers started (PID: $SERVER_PID)"

	- name: Test all policies sequentially
	timeout-minutes: 30
	run: \|
	POLICIES=("random" "round_robin" "cache_aware" "power_of_two")
	BASE_URL="http://127.0.0.9:8000"

	for policy in "${POLICIES[@]}"; do
	echo ""
	echo "=================================================="
	echo "Testing policy: $policy"
	echo "=================================================="

	# Start router with the current policy
	echo "Starting router with policy: $policy..."
	python3 -m sglang_router.launch_router \
	--pd-disaggregation \
	--policy "$policy" \
	--prefill http://127.0.0.1:30001 9001 \
	--prefill http://127.0.0.2:30002 9002 \
	--prefill http://127.0.0.3:30003 9003 \
	--prefill http://127.0.0.4:30004 9004 \
	--decode http://127.0.0.5:30005 \
	--decode http://127.0.0.6:30006 \
	--decode http://127.0.0.7:30007 \
	--decode http://127.0.0.8:30008 \
	--host 127.0.0.9 \
	--port 8000 &
	ROUTER_PID=$!

	# Wait for router to become healthy
	echo "Waiting for router to become healthy..."
	TIMEOUT=60
	ELAPSED=0
	while [ $ELAPSED -lt $TIMEOUT ]; do
	if curl --connect-timeout 5 --silent http://127.0.0.9:8000 > /dev/null 2>&1; then
	echo "✓ Router is reachable"
	break
	fi
	if ! ps -p $ROUTER_PID > /dev/null; then
	echo "Error: Router process died"
	exit 1
	fi
	sleep 5
	ELAPSED=$((ELAPSED + 5))
	done

	if [ $ELAPSED -ge $TIMEOUT ]; then
	echo "Error: Router health check timeout"
	kill $ROUTER_PID 2>/dev/null \|\| true
	exit 1
	fi

	# Test API functionality
	echo "Testing API completions for $policy..."
	response=$(curl -s -X POST "$BASE_URL/v1/chat/completions" \
	-H "Content-Type: application/json" \
	-H "Authorization: Bearer test-token" \
	-d '{
	"model": "/raid/models/meta-llama/Llama-3.1-8B-Instruct",
	"messages": [
	{"role": "user", "content": "Write a Python function to calculate fibonacci numbers recursively"}
	],
	"stream": false,
	"max_tokens": 100
	}')

	if echo "$response" \| jq -e '.choices[0].message.content' > /dev/null 2>&1; then
	echo "✓ API test passed for $policy"
	else
	echo "✗ API test failed for $policy: $response"
	kill $ROUTER_PID 2>/dev/null \|\| true
	exit 1
	fi

	# Test streaming
	echo "Testing streaming API for $policy..."
	stream_response=$(timeout 30 curl -s -X POST "$BASE_URL/v1/chat/completions" \
	-H "Content-Type: application/json" \
	-H "Authorization: Bearer test-token" \
	-d '{
	"model": "/raid/models/meta-llama/Llama-3.1-8B-Instruct",
	"messages": [
	{"role": "user", "content": "Count from 1 to 5"}
	],
	"stream": true,
	"max_tokens": 50
	}')

	if echo "$stream_response" \| grep -q "data:"; then
	echo "✓ Streaming API test passed for $policy"
	else
	echo "✗ Streaming API test failed for $policy"
	kill $ROUTER_PID 2>/dev/null \|\| true
	exit 1
	fi

	# Run genai-bench benchmark
	echo "Running genai-bench for $policy..."
	genai-bench benchmark \
	--api-backend openai \
	--api-base "http://127.0.0.9:8000" \
	--api-key "dummy-token" \
	--api-model-name "/raid/models/meta-llama/Llama-3.1-8B-Instruct" \
	--model-tokenizer /raid/models/meta-llama/Llama-3.1-8B-Instruct \
	--task text-to-text \
	--num-concurrency 64 \
	--traffic-scenario "D(8000,2000)" \
	--max-requests-per-run 640 \
	--max-time-per-run 2 \
	--experiment-folder-name "benchmark_${policy}" \
	--experiment-base-dir "."

	# Find the actual experiment folder
	actual_folder=$(find . -maxdepth 1 -name "benchmark_${policy}" -type d \| head -1)

	if [ -n "$actual_folder" ]; then
	# Extract metrics from the Excel summary or JSON files
	summary_file="$actual_folder"/*_summary.xlsx
	json_files=$(find "$actual_folder" -name "*.json" \| grep -v experiment_metadata)

	echo "Genai-bench results saved in: $actual_folder"

	# Extract mean values and validate performance thresholds
	echo "📊 Extracting performance metrics for $policy..."

	# Find JSON files excluding experiment metadata
	json_files=$(find "$actual_folder" -name "*.json" \| grep -v experiment_metadata)

	if [ -n "$json_files" ]; then
	# Extract metrics using jq and validate against loose thresholds
	for json_file in $json_files; do
	echo "Processing: $(basename "$json_file")"

	# Extract mean values for performance validation
	ttft_mean=$(jq -r '.aggregated_metrics.stats.ttft.mean' "$json_file")
	e2e_latency_mean=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean' "$json_file")
	input_throughput_mean=$(jq -r '.aggregated_metrics.stats.input_throughput.mean' "$json_file")
	output_throughput_mean=$(jq -r '.aggregated_metrics.stats.output_throughput.mean' "$json_file")

	echo " TTFT mean: ${ttft_mean}s"
	echo " E2E Latency mean: ${e2e_latency_mean}s"
	echo " Input Throughput mean: ${input_throughput_mean} tokens/s"
	echo " Output Throughput mean: ${output_throughput_mean} tokens/s"

	# Set mean thresholds (allowing for reasonable variance)
	# These can be adjusted based on your performance requirements
	ttft_threshold=2.0 # Max 2.0 seconds for mean TTFT
	e2e_latency_threshold=24.0 # Max 8.0 seconds for mean E2E latency
	input_throughput_threshold=10000 # Min 9000 tokens/s for mean input throughput
	output_throughput_threshold=90 # Min 100 tokens/s for mean output throughput


	# Validate mean thresholds
	validation_passed=true

	if (( $(echo "$ttft_mean > $ttft_threshold" \| bc -l) )); then
	echo "❌ TTFT validation failed: $ttft_mean > $ttft_threshold"
	validation_passed=false
	fi

	if (( $(echo "$e2e_latency_mean > $e2e_latency_threshold" \| bc -l) )); then
	echo "❌ E2E Latency validation failed: $e2e_latency_mean > $e2e_latency_threshold"
	validation_passed=false
	fi

	if (( $(echo "$input_throughput_mean < $input_throughput_threshold" \| bc -l) )); then
	echo "❌ Input Throughput validation failed: $input_throughput_mean < $input_throughput_threshold"
	validation_passed=false
	fi

	if (( $(echo "$output_throughput_mean < $output_throughput_threshold" \| bc -l) )); then
	echo "❌ Output Throughput validation failed: $output_throughput_mean < $output_throughput_threshold"
	validation_passed=false
	fi

	if [ "$validation_passed" = true ]; then
	echo "✅ Performance validation passed for $policy"
	else
	echo "❌ Performance validation failed for $policy"
	kill $ROUTER_PID 2>/dev/null \|\| true
	exit 1
	fi
	done

	echo "✓ Genai-bench completed successfully for $policy"
	echo "📊 Detailed metrics and plots available in: $actual_folder"
	else
	echo "✗ Benchmark failed for $policy: No JSON results found"
	kill $ROUTER_PID 2>/dev/null \|\| true
	exit 1
	fi
	else
	echo "✗ Benchmark failed for $policy: Experiment folder not found"
	kill $ROUTER_PID 2>/dev/null \|\| true
	exit 1
	fi

	# Stop router before testing next policy
	echo "Stopping router for $policy..."
	# First try graceful shutdown
	kill $ROUTER_PID 2>/dev/null \|\| true

	# Wait up to 5 seconds for graceful shutdown
	for i in {1..5}; do
	if ! ps -p $ROUTER_PID > /dev/null 2>&1; then
	echo "Router stopped gracefully"
	break
	fi
	sleep 1
	done

	# Force kill if still running
	if ps -p $ROUTER_PID > /dev/null 2>&1; then
	echo "Force killing router..."
	kill -9 $ROUTER_PID 2>/dev/null \|\| true
	fi

	# Short delay to ensure port is released
	sleep 2

	echo "✓ Completed testing for $policy"
	done

	echo ""
	echo "✅ All policies tested successfully!"


	- name: Upload benchmark results
	if: success()
	uses: actions/upload-artifact@v4
	with:
	name: genai-bench-results-all-policies
	path: benchmark_**/

	- name: Cleanup servers
	if: always()
	run: \|
	if [ -n "${{ steps.start_servers.outputs.server_pid }}" ]; then
	pkill -P ${{ steps.start_servers.outputs.server_pid }} \|\| true
	kill ${{ steps.start_servers.outputs.server_pid }} \|\| true
	fi
	pkill -f "sglang.launch_server" \|\| true
	sleep 5
	remaining=$(ps aux \| grep -c "sglang.launch_server" \|\| echo "0")
	echo "Cleanup completed. Remaining processes: $remaining"

	summarize-benchmarks:
	needs: test-disaggregation
	runs-on: ubuntu-latest
	if: success()

	steps:
	- name: Install jq
	run: sudo apt-get update && sudo apt-get install -y jq bc

	- name: Download benchmark results
	uses: actions/download-artifact@v4
	with:
	name: genai-bench-results-all-policies

	- name: List downloaded contents
	run: \|
	echo "Contents after download:"
	ls -la
	find . -name "benchmark_*" -type d
	echo "JSON files found:"
	find . -name "*.json" \| head -10

	- name: Create benchmark summary
	run: \|
	echo "=== DEBUG: Creating benchmark summary ==="
	echo "Available benchmark directories:"
	find . -name "benchmark_*" -type d
	echo "=========================================="

	echo "## PD Router Genai-Bench Results Summary" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "🚀 Benchmarked with genai-bench for comprehensive LLM serving performance evaluation" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "\| Policy \| Status \| TTFT (s) \| E2E Latency (s) \| Input Throughput (tok/s) \| Output Throughput (tok/s) \|" >> $GITHUB_STEP_SUMMARY
	echo "\|--------\|--------\|----------\|-----------------\|--------------------------\|---------------------------\|" >> $GITHUB_STEP_SUMMARY

	# First, complete the table with all policies
	for policy in random round_robin cache_aware power_of_two; do
	# Find genai-bench result folders for this policy (handle zip extraction structure)
	result_folder=$(find . -maxdepth 2 -name "benchmark_${policy}" -type d \| head -1)
	if [ -z "$result_folder" ]; then
	# Try alternative patterns in case of different extraction structure
	result_folder=$(find . -maxdepth 3 -path "benchmark_${policy}" -type d \| head -1)
	fi

	echo "DEBUG: Policy ${policy} -> Found folder: ${result_folder:-'NOT FOUND'}"

	if [ -n "$result_folder" ] && [ -d "$result_folder" ]; then
	# Find JSON file with metrics
	json_file=$(find "$result_folder" -name "*.json" -not -name "experiment_metadata.json" \| head -1)

	if [ -n "$json_file" ] && [ -f "$json_file" ]; then
	# Extract performance metrics
	ttft_mean=$(jq -r '.aggregated_metrics.stats.ttft.mean // "N/A"' "$json_file" 2>/dev/null \|\| echo "N/A")
	e2e_latency_mean=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean // "N/A"' "$json_file" 2>/dev/null \|\| echo "N/A")
	input_throughput_mean=$(jq -r '.aggregated_metrics.stats.input_throughput.mean // "N/A"' "$json_file" 2>/dev/null \|\| echo "N/A")
	output_throughput_mean=$(jq -r '.aggregated_metrics.stats.output_throughput.mean // "N/A"' "$json_file" 2>/dev/null \|\| echo "N/A")

	# Format numbers for display (2 decimal places)
	if [ "$ttft_mean" != "N/A" ] && [ "$ttft_mean" != "null" ]; then
	ttft_display=$(printf "%.2f" "$ttft_mean" 2>/dev/null \|\| echo "$ttft_mean")
	else
	ttft_display="N/A"
	fi

	if [ "$e2e_latency_mean" != "N/A" ] && [ "$e2e_latency_mean" != "null" ]; then
	e2e_display=$(printf "%.2f" "$e2e_latency_mean" 2>/dev/null \|\| echo "$e2e_latency_mean")
	else
	e2e_display="N/A"
	fi

	if [ "$input_throughput_mean" != "N/A" ] && [ "$input_throughput_mean" != "null" ]; then
	input_display=$(printf "%.0f" "$input_throughput_mean" 2>/dev/null \|\| echo "$input_throughput_mean")
	else
	input_display="N/A"
	fi

	if [ "$output_throughput_mean" != "N/A" ] && [ "$output_throughput_mean" != "null" ]; then
	output_display=$(printf "%.0f" "$output_throughput_mean" 2>/dev/null \|\| echo "$output_throughput_mean")
	else
	output_display="N/A"
	fi

	echo "\| ${policy} \| ✅ Success \| $ttft_display \| $e2e_display \| $input_display \| $output_display \|" >> $GITHUB_STEP_SUMMARY
	else
	echo "\| ${policy} \| ❌ No Data \| N/A \| N/A \| N/A \| N/A \|" >> $GITHUB_STEP_SUMMARY
	fi
	else
	echo "\| ${policy} \| ❌ Failed \| N/A \| N/A \| N/A \| N/A \|" >> $GITHUB_STEP_SUMMARY
	fi
	done

	# Add performance validation summary
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "## 📊 Performance Validation" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "Thresholds: TTFT ≤ 2.0s \| E2E Latency ≤ 8.0s \| Input Throughput ≥ 10,000 tok/s \| Output Throughput ≥ 100 tok/s" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY

	validation_summary=""
	for policy in random round_robin cache_aware power_of_two; do
	# Use same robust path finding as above
	result_folder=$(find . -maxdepth 2 -name "benchmark_${policy}" -type d \| head -1)
	if [ -z "$result_folder" ]; then
	result_folder=$(find . -maxdepth 3 -path "benchmark_${policy}" -type d \| head -1)
	fi

	if [ -n "$result_folder" ] && [ -d "$result_folder" ]; then
	json_file=$(find "$result_folder" -name "*.json" -not -name "experiment_metadata.json" \| head -1)
	if [ -n "$json_file" ] && [ -f "$json_file" ]; then
	# Extract metrics for validation
	ttft=$(jq -r '.aggregated_metrics.stats.ttft.mean // "N/A"' "$json_file" 2>/dev/null \|\| echo "N/A")
	e2e_latency=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean // "N/A"' "$json_file" 2>/dev/null \|\| echo "N/A")
	input_throughput=$(jq -r '.aggregated_metrics.stats.input_throughput.mean // "N/A"' "$json_file" 2>/dev/null \|\| echo "N/A")
	output_throughput=$(jq -r '.aggregated_metrics.stats.output_throughput.mean // "N/A"' "$json_file" 2>/dev/null \|\| echo "N/A")

	# Check thresholds (using same values as in main workflow)
	validation_status="✅"
	if [ "$ttft" != "N/A" ] && [ "$ttft" != "null" ]; then
	if (( $(echo "$ttft > 2.0" \| bc -l 2>/dev/null \|\| echo "0") )); then
	validation_status="❌"
	fi
	fi
	if [ "$e2e_latency" != "N/A" ] && [ "$e2e_latency" != "null" ]; then
	if (( $(echo "$e2e_latency > 24.0" \| bc -l 2>/dev/null \|\| echo "0") )); then
	validation_status="❌"
	fi
	fi
	if [ "$input_throughput" != "N/A" ] && [ "$input_throughput" != "null" ]; then
	if (( $(echo "$input_throughput < 10000" \| bc -l 2>/dev/null \|\| echo "0") )); then
	validation_status="❌"
	fi
	fi
	if [ "$output_throughput" != "N/A" ] && [ "$output_throughput" != "null" ]; then
	if (( $(echo "$output_throughput < 90" \| bc -l 2>/dev/null \|\| echo "0") )); then
	validation_status="❌"
	fi
	fi

	validation_summary="${validation_summary}- ${policy}: $validation_status\n"
	else
	validation_summary="${validation_summary}- ${policy}: ❌ No data\n"
	fi
	else
	validation_summary="${validation_summary}- ${policy}: ❌ Failed\n"
	fi
	done

	echo -e "$validation_summary" >> $GITHUB_STEP_SUMMARY

	echo "" >> $GITHUB_STEP_SUMMARY
	echo "## 📊 Genai-Bench Features Used" >> $GITHUB_STEP_SUMMARY
	echo "- Token-level Performance: TTFT, TPOT, End-to-End latency" >> $GITHUB_STEP_SUMMARY
	echo "- Throughput Analysis: Input/Output/Total token throughput" >> $GITHUB_STEP_SUMMARY
	echo "- Statistical Analysis: Percentiles, mean, std dev for all metrics" >> $GITHUB_STEP_SUMMARY
	echo "- Visual Reports: Automated plots and Excel summaries" >> $GITHUB_STEP_SUMMARY
	echo "- SGLang Backend: Native integration with SGLang serving" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "✅ All policies tested successfully with genai-bench!" >> $GITHUB_STEP_SUMMARY

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[router] add tool parser base structure and partial json parser (#9482) #8

Workflow file

[router] add tool parser base structure and partial json parser (#9482) #8

Uh oh!

Jobs

Run details

Workflow file for this run