1
- #! /bin/bash
1
+ #! /usr/ bin/env bash
2
2
DIR=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd) "
3
- echo " $ DIR"
3
+ echo " Current directory: ${ DIR} "
4
4
5
- # 先kill一遍
6
- ps -efww | grep -E ' api_server' | grep -v grep | awk ' {print $2}' | xargs kill -9 || true
7
- ps -efww | grep -E ' 8188' | grep -v grep | awk ' {print $2}' | xargs kill -9 || true
8
- lsof -t -i :8188 | xargs kill -9 || true
5
+ function stop_processes() {
6
+ ps -efww | grep -E ' api_server' | grep -v grep | awk ' {print $2}' | xargs kill -9 || true
7
+ ps -efww | grep -E ' 8188' | grep -v grep | awk ' {print $2}' | xargs kill -9 || true
8
+ lsof -t -i :8188 | xargs kill -9 || true
9
+ }
9
10
10
- export model_path=${MODEL_PATH} /paddle/ERNIE-4.5-21B-A3B-Paddle
11
+ echo " Clean up processes..."
12
+ stop_processes
13
+ echo " Clean up completed."
14
+
15
+ export model_path=${MODEL_PATH} /ERNIE-4.5-21B-A3B-Paddle
11
16
12
17
echo " pip install requirements"
13
18
python -m pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
14
19
echo " uninstall org"
15
20
python -m pip uninstall paddlepaddle -y
16
21
python -m pip uninstall paddle-custom-gcu -y
17
22
python -m pip install paddlepaddle==3.1.0a0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
23
+ python -m pip install --pre paddle-custom-gcu==3.0.0.dev20250801 -i https://www.paddlepaddle.org.cn/packages/nightly/gcu/
18
24
echo " build whl"
19
25
bash build.sh 1 || exit 1
20
26
21
27
unset http_proxy
22
28
unset https_proxy
23
29
unset no_proxy
24
30
25
- # 起服务
26
31
rm -rf log/*
27
32
rm -f core*
28
- # pkill -9 python #流水线不执行这个
29
- # 清空消息队列
33
+
34
+ # Empty the message queue
30
35
ipcrm --all=msg
36
+ echo " Start server..."
31
37
python -m fastdeploy.entrypoints.openai.api_server \
32
38
--model ${model_path} \
33
39
--port 8188 \
@@ -38,21 +44,40 @@ python -m fastdeploy.entrypoints.openai.api_server \
38
44
--max-num-seqs 8 \
39
45
--quantization wint4 > server.log 2>&1 &
40
46
41
- sleep 60
42
- # 探活
43
- TIMEOUT=$(( 5 * 60 ))
44
- INTERVAL=10 # 检查间隔(秒)
47
+ echo " Waiting 90 seconds..."
48
+ sleep 90
49
+
50
+ if grep -q " Failed to launch worker processes" server.log; then
51
+ echo " Failed to launch worker processes..."
52
+ stop_processes
53
+ cat server.log
54
+ cat log/workerlog.0
55
+ exit 1
56
+ fi
57
+
58
+ if grep -q " Traceback (most recent call last):" server.log; then
59
+ echo " Some errors occurred..."
60
+ stop_processes
61
+ cat server.log
62
+ cat log/workerlog.0
63
+ exit 1
64
+ fi
65
+
66
+ # Health check
67
+ TIMEOUT=$(( 11 * 60 ))
68
+ INTERVAL=30 # Check interval (seconds)
45
69
ENDPOINT=" http://0.0.0.0:8188/health"
46
- START_TIME=$( date +%s) # 记录开始时间戳
47
- echo " 开始服务健康检查,最长等待时间: ${TIMEOUT} 秒 "
70
+ START_TIME=$( date +%s) # Record the start timestamp
71
+ echo " Start the server health check, maximum waiting time: ${TIMEOUT} seconds... "
48
72
while true ; do
49
- # 计算已耗时
73
+ # Used to calculate the time cost
50
74
CURRENT_TIME=$( date +%s)
51
75
ELAPSED=$(( CURRENT_TIME - START_TIME))
52
76
53
- # 超时判断
77
+ # Timeout
54
78
if [ $ELAPSED -ge $TIMEOUT ]; then
55
- echo -e " \n服务启动超时:经过 $(( TIMEOUT/ 60 )) 分钟服务仍未启动!"
79
+ echo -e " \nServer start timeout: After $(( TIMEOUT/ 60 )) minutes, the service still doesn't start!"
80
+ stop_processes
56
81
cat server.log
57
82
cat log/workerlog.0
58
83
exit 1
@@ -61,26 +86,27 @@ while true; do
61
86
HTTP_CODE=$( curl -s -o /dev/null -w " %{http_code}" -m 2 " $ENDPOINT " || true)
62
87
63
88
if [ " $HTTP_CODE " = " 200" ]; then
64
- echo -e " \n服务启动成功!耗时 ${ ELAPSED} 秒 "
89
+ echo -e " \nThe server was successfully launched! Totally takes $(( ELAPSED+ 90 )) seconds. "
65
90
break
66
91
else
67
92
sleep $INTERVAL
68
93
fi
69
94
done
70
95
71
96
cat server.log
97
+ echo -e " \n"
72
98
73
- # 执行服务化推理
99
+ echo " Start inference... "
74
100
python test/ci_use/GCU/run_ernie.py
75
101
exit_code=$?
76
- echo exit_code is ${exit_code}
102
+ echo -e " exit_code is ${exit_code} .\n "
77
103
78
- ps -efww | grep -E ' api_server ' | grep -v grep | awk ' {print $2} ' | xargs kill -9 || true
79
- ps -efww | grep -E ' 8188 ' | grep -v grep | awk ' {print $2} ' | xargs kill -9 || true
80
- lsof -t -i :8188 | xargs kill -9 || true
104
+ echo " Stop server... "
105
+ stop_processes
106
+ echo " Stop server done. "
81
107
82
108
if [ ${exit_code} -ne 0 ]; then
83
- echo " log/workerlog.0"
109
+ echo " Exit with error, please refer to log/workerlog.0"
84
110
cat log/workerlog.0
85
111
exit 1
86
112
fi
0 commit comments