Skip to content

【TEST】add some func test ci #3412

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions test/ci_use/QwQ-32B/run_server.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash
MODEL_PATH=${1}

if [ -z "$MODEL_PATH" ]; then
echo "❌ 用法: $0 <模型路径>"
exit 1
fi

if [ ! -d "$MODEL_PATH" ]; then
echo "❌ 错误:模型目录不存在: $MODEL_PATH"
exit 1
fi

echo "📁 使用模型: $MODEL_PATH"


# 清理日志
rm -rf log/*
mkdir -p log

# 环境变量
export CUDA_VISIBLE_DEVICES=0,1
export INFERENCE_MSG_QUEUE_ID=7679
export ENABLE_V1_KVCACHE_SCHEDULER=1


python -m fastdeploy.entrypoints.openai.api_server \
--tensor-parallel-size 2 \
--port 8787 \
--engine-worker-queue-port 7679 \
--metrics-port 7877 \
--quantization wint8 \
--max-model-len 32768 \
--max-num-seqs 256 \
--gpu-memory-utilization 0.9 \
--model "$MODEL_PATH" \
--load-strategy ipc_snapshot \
--dynamic-load-weight
155 changes: 155 additions & 0 deletions test/ci_use/QwQ-32B/run_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#!/bin/bash

# ================== Configuration Parameters ==================

HOST="0.0.0.0"
PORT="8787"
BASE_URL="http://$HOST:$PORT"

TOTAL_ROUNDS=100
CHAT_REQUESTS_PER_ROUND=5
export CUDA_VISIBLE_DEVICES=0,1
MAX_MEMORY_MB=10240 # 10GB

# ====================================================
# assert_eq actual expected message
assert_eq() {
local actual="$1"
local expected="$2"
local msg="$3"
if [ "$actual" != "$expected" ]; then
echo "Assertion failed: $msg" >&2
exit 1
fi
}

# assert_true condition message
assert_true() {
local condition="$1"
local msg="$2"
if [ "$condition" != "1" ] && [ "$condition" != "true" ]; then
echo "Assertion failed: $msg" >&2
exit 1
fi
}

# assert_success exit_code message
assert_success() {
local code="$1"
local msg="$2"
if [ "$code" -ne 0 ]; then
echo "Assertion failed: $msg" >&2
exit 1
fi
}

# curl_get_status(url, options...) → returns via global variables http_code and response_body
curl_get_status() {
local result
result=$(curl -s -w "%{http_code}" "$@")
http_code="${result: -3}"
response_body="${result%???}"
}

# ====================================================
# Get visible GPU IDs from CUDA_VISIBLE_DEVICES
# ====================================================

get_visible_gpu_ids() {
local ids=()
IFS=',' read -ra ADDR <<< "$CUDA_VISIBLE_DEVICES"
for i in "${ADDR[@]}"; do
if [[ "$i" =~ ^[0-9]+$ ]]; then
ids+=("$i")
fi
done
echo "${ids[@]}"
}

# ====================================================
# Check GPU memory usage (must not exceed MAX_MEMORY_MB)
# ====================================================

check_gpu_memory() {
local gpu_ids
gpu_ids=($(get_visible_gpu_ids))

if [ ${#gpu_ids[@]} -eq 0 ]; then
echo "Assertion failed: No valid GPU IDs in CUDA_VISIBLE_DEVICES='$CUDA_VISIBLE_DEVICES'" >&2
exit 1
fi

for gpu_id in "${gpu_ids[@]}"; do
local memory_used
memory_used=$(nvidia-smi -i "$gpu_id" --query-gpu=memory.used --format=csv,noheader,nounits 2>/dev/null) || \
assert_success $? "Failed to query GPU $gpu_id memory usage"

if ! [[ "$memory_used" =~ ^[0-9]+ ]]; then
echo "Assertion failed: Invalid memory value for GPU $gpu_id: $memory_used" >&2
exit 1
fi

assert_true "$(( memory_used <= MAX_MEMORY_MB ))" \
"GPU $gpu_id memory $memory_used MB > $MAX_MEMORY_MB MB"
done
}

# ====================================================

for round in $(seq 1 $TOTAL_ROUNDS); do
echo "=== Round $round / $TOTAL_ROUNDS ==="

# Step 1: Clear loaded weights
echo "[Step 1] Clearing load weight..."
curl_get_status -i "$BASE_URL/clear_load_weight"
assert_eq "$http_code" "200" "/clear_load_weight failed with HTTP $http_code"

# Step 2: Check GPU memory usage
echo "[Step 2] Checking GPU memory..."
check_gpu_memory

# Step 3: Update model weights
echo "[Step 3] Updating model weight..."
curl_get_status -i "$BASE_URL/update_model_weight"
assert_eq "$http_code" "200" "/update_model_weight failed with HTTP $http_code"

# Step 4: Send chat completion requests
echo "[Step 4] Sending $CHAT_REQUESTS_PER_ROUND chat completions..."
for i in $(seq 1 $CHAT_REQUESTS_PER_ROUND); do
echo " Request $i / $CHAT_REQUESTS_PER_ROUND"
# Send request and capture response
response=$(curl -s -X POST "$BASE_URL/v1/chat/completions" \
-H "Content-Type: application/json" \
-d '{"messages": [{"role": "user", "content": "Hello!"}]}')

# Extract the 'content' field from the response
content=$(echo "$response" | \
grep -o '"content":"[^"]*"' | \
head -1 | \
sed 's/^"content":"//' | \
sed 's/"$//')

if [ -z "$content" ]; then
# Fallback: try extracting content using sed more robustly
content=$(echo "$response" | \
sed -n 's/.*"content":"\([^"]*\)".*/\1/p' | \
head -1)
fi

# Check if content is empty or null
if [ -z "$content" ] || [ "$content" = "null" ]; then
echo "❌ Failed: Empty or null 'content' in response" >&2
echo "Raw response:" >&2
echo "$response" >&2
exit 1
fi

echo "✅ Received non-empty response"
echo -e "\n---\n"
done

echo "Round $round completed."
echo "==================================\n"
done

echo "✅ All $TOTAL_ROUNDS rounds completed successfully."
Loading