Skip to content

Commit 03f8018

Browse files
committed
[GCU] Enable CI
1 parent ec81c15 commit 03f8018

File tree

3 files changed

+66
-38
lines changed

3 files changed

+66
-38
lines changed

.github/workflows/ci_gcu.yml

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ concurrency:
1313

1414
jobs:
1515
CI_GCU:
16-
runs-on: [self-hosted, GCU-S60-8Card]
16+
runs-on:
17+
group: GCU
1718
steps:
1819
- name: Print current runner name
1920
run: |
@@ -28,7 +29,9 @@ jobs:
2829
REPO_NAME="${FULL_REPO##*/}"
2930
BASE_BRANCH="${{ github.base_ref }}"
3031
# Clean the repository directory before starting
31-
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
32+
docker run --rm --net=host -v $(pwd):/workspace \
33+
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
34+
-w /workspace \
3235
-e "REPO_NAME=${REPO_NAME}" \
3336
-e "BASE_BRANCH=${BASE_BRANCH}" \
3437
${docker_image} /bin/bash -c '
@@ -39,6 +42,7 @@ jobs:
3942
'
4043
git config --global user.name "FastDeployCI"
4144
git config --global user.email "fastdeploy_ci@example.com"
45+
source ${{ github.workspace }}/../../../proxy
4246
git clone ${REPO} ${REPO_NAME} -b ${BASE_BRANCH}
4347
cd FastDeploy
4448
if [ "${{ github.event_name }}" = "pull_request" ]; then
@@ -49,6 +53,9 @@ jobs:
4953
git checkout ${{ github.sha }}
5054
git log -n 3 --oneline
5155
fi
56+
echo "Copy models..."
57+
sudo mkdir -p ci_models && sudo cp -r /work/deps/ERNIE-4.5-21B-A3B-Paddle ci_models
58+
echo "Copy models done."
5259
5360
- name: Run CI unittest
5461
env:
@@ -70,19 +77,21 @@ jobs:
7077
echo "PARENT_DIR:$PARENT_DIR"
7178
echo "Install drivers..."
7279
cd /work/deps
73-
bash TopsRider_i3x_*_deb_amd64.run --driver --no-auto-load -y
80+
sudo bash TopsRider_i3x_*_deb_amd64.run --driver --no-auto-load -y
7481
cd -
75-
docker run --rm --network=host --ipc=host -it --privileged \
76-
-v $(pwd):/workspace -w /workspace \
77-
-v "/home:/home" \
78-
-v "/work:/work" \
79-
-e "MODEL_PATH=/work/models" \
82+
echo "Create docker..."
83+
docker run --rm --network=host --ipc=host --privileged \
84+
-v $(pwd):/workspace \
85+
-v /home:/home \
86+
-v /work:/work \
87+
-w /workspace \
88+
-e "MODEL_PATH=./ci_models" \
8089
-e "http_proxy=$(git config --global --get http.proxy)" \
8190
-e "https_proxy=$(git config --global --get https.proxy)" \
8291
-e "FD_API_PORT=${FD_API_PORT}" \
8392
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
8493
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
85-
${docker_image} /bin/bash -c "
94+
${docker_image} /bin/bash -c "
8695
git config --global --add safe.directory /workspace/FastDeploy
8796
cd FastDeploy
8897
bash scripts/run_ci_gcu.sh

scripts/run_ci_gcu.sh

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,38 @@
11
#!/bin/bash
22
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
3-
echo "$DIR"
3+
echo "Current directory: ${DIR}"
44

5-
#先kill一遍
6-
ps -efww | grep -E 'api_server' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
7-
ps -efww | grep -E '8188' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
8-
lsof -t -i :8188 | xargs kill -9 || true
5+
# Clean up the environment
6+
fastdeploy_python_pids=$(ps -ef | grep "python" | grep -v grep | awk '{print $2}')
7+
echo "Process to clear:"
8+
echo $fastdeploy_python_pids
9+
for in_pid in ${fastdeploy_python_pids[@]}; do
10+
kill -9 ${in_pid}
11+
done
12+
echo "Clear done."
913

10-
export model_path=${MODEL_PATH}/paddle/ERNIE-4.5-21B-A3B-Paddle
14+
export model_path=${MODEL_PATH}/ERNIE-4.5-21B-A3B-Paddle
1115

1216
echo "pip install requirements"
1317
python -m pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
1418
echo "uninstall org"
1519
python -m pip uninstall paddlepaddle -y
1620
python -m pip uninstall paddle-custom-gcu -y
1721
python -m pip install paddlepaddle==3.1.0a0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
22+
python -m pip install --pre paddle-custom-gcu==3.0.0.dev20250801 -i https://www.paddlepaddle.org.cn/packages/nightly/gcu/
1823
echo "build whl"
1924
bash build.sh 1 || exit 1
2025

2126
unset http_proxy
2227
unset https_proxy
2328
unset no_proxy
2429

25-
# 起服务
2630
rm -rf log/*
2731
rm -f core*
28-
# pkill -9 python #流水线不执行这个
29-
#清空消息队列
32+
33+
# Empty the message queue
3034
ipcrm --all=msg
35+
echo "Start server..."
3136
python -m fastdeploy.entrypoints.openai.api_server \
3237
--model ${model_path} \
3338
--port 8188 \
@@ -38,21 +43,23 @@ python -m fastdeploy.entrypoints.openai.api_server \
3843
--max-num-seqs 8 \
3944
--quantization wint4 > server.log 2>&1 &
4045

41-
sleep 60
42-
# 探活
43-
TIMEOUT=$((5 * 60))
44-
INTERVAL=10 # 检查间隔(秒)
46+
echo "Waiting 90 seconds..."
47+
sleep 90
48+
49+
# Health check
50+
TIMEOUT=$((11 * 60))
51+
INTERVAL=10 # Check interval (seconds)
4552
ENDPOINT="http://0.0.0.0:8188/health"
46-
START_TIME=$(date +%s) # 记录开始时间戳
47-
echo "开始服务健康检查,最长等待时间:${TIMEOUT}"
53+
START_TIME=$(date +%s) # Record the start timestamp
54+
echo "Start the server health check, maximum waiting time: ${TIMEOUT} seconds..."
4855
while true; do
49-
# 计算已耗时
56+
# Used to calculate the time cost
5057
CURRENT_TIME=$(date +%s)
5158
ELAPSED=$((CURRENT_TIME - START_TIME))
5259

53-
# 超时判断
60+
# Timeout
5461
if [ $ELAPSED -ge $TIMEOUT ]; then
55-
echo -e "\n服务启动超时:经过 $((TIMEOUT/60)) 分钟服务仍未启动!"
62+
echo -e "\nServer start timeout: After $((TIMEOUT/60)) minutes, the service still doesn't start!"
5663
cat server.log
5764
cat log/workerlog.0
5865
exit 1
@@ -61,26 +68,32 @@ while true; do
6168
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -m 2 "$ENDPOINT" || true)
6269

6370
if [ "$HTTP_CODE" = "200" ]; then
64-
echo -e "\n服务启动成功!耗时 ${ELAPSED}"
71+
echo -e "\nThe server was successfully launched! Totally takes $((ELAPSED+60)) seconds."
6572
break
6673
else
6774
sleep $INTERVAL
6875
fi
6976
done
7077

7178
cat server.log
79+
echo -e "\n"
7280

73-
# 执行服务化推理
81+
echo "Start inference..."
7482
python test/ci_use/GCU/run_ernie.py
7583
exit_code=$?
76-
echo exit_code is ${exit_code}
84+
echo -e "exit_code is ${exit_code}.\n"
7785

78-
ps -efww | grep -E 'api_server' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
79-
ps -efww | grep -E '8188' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
80-
lsof -t -i :8188 | xargs kill -9 || true
86+
echo "Stop server..."
87+
fastdeploy_python_pids=$(ps -ef | grep "python" | grep -v grep | awk '{print $2}')
88+
echo "Process to stop:"
89+
echo $fastdeploy_python_pids
90+
for in_pid in ${fastdeploy_python_pids[@]}; do
91+
kill -9 ${in_pid}
92+
done
93+
echo "Stop server done."
8194

8295
if [ ${exit_code} -ne 0 ]; then
83-
echo "log/workerlog.0"
96+
echo "Exit with error, please refer to log/workerlog.0"
8497
cat log/workerlog.0
8598
exit 1
8699
fi

test/ci_use/GCU/run_ernie.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,24 @@
1515
import openai
1616

1717
ip = "0.0.0.0"
18-
service_http_port = "8188" # 服务配置的
18+
service_http_port = "8188"
1919
client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY")
2020

21-
# 非流式对话
2221
response = client.chat.completions.create(
2322
model="default",
2423
messages=[
2524
{"role": "user", "content": "The largest ocean is"},
2625
],
2726
temperature=1,
2827
top_p=0,
29-
max_tokens=64,
28+
max_tokens=256,
3029
stream=False,
3130
)
32-
print(response)
31+
print(f"response is: {response}", flush=True)
32+
33+
generate_context = response.choices[0].message.content
34+
print(f"\ngenerate_context is: {generate_context}", flush=True)
35+
36+
assert "pacific ocean" in generate_context.lower(), "The answer was incorrect!"
37+
38+
print("Test successfully!", flush=True)

0 commit comments

Comments
 (0)