1
- #! /bin/bash
1
+ #! /usr/ bin/env bash
2
2
DIR=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd) "
3
- echo " $ DIR"
3
+ echo " Current directory: ${ DIR} "
4
4
5
- # 先kill一遍
6
- ps -efww | grep -E ' api_server' | grep -v grep | awk ' {print $2}' | xargs kill -9 || true
7
- ps -efww | grep -E ' 8188' | grep -v grep | awk ' {print $2}' | xargs kill -9 || true
8
- lsof -t -i :8188 | xargs kill -9 || true
5
+ function stop_processes() {
6
+ fastdeploy_python_pids=$( ps -ef | grep " python" | grep -v grep | awk ' {print $2}' )
7
+ echo " Process to stop:"
8
+ echo $fastdeploy_python_pids
9
+ for in_pid in ${fastdeploy_python_pids[@]} ; do
10
+ kill -9 ${in_pid}
11
+ done
12
+ }
9
13
10
- export model_path=${MODEL_PATH} /paddle/ERNIE-4.5-21B-A3B-Paddle
14
+ echo " Clean up processes..."
15
+ stop_processes
16
+ echo " Clean up completed."
17
+
18
+ export model_path=${MODEL_PATH} /ERNIE-4.5-21B-A3B-Paddle
11
19
12
20
echo " pip install requirements"
13
21
python -m pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
14
22
echo " uninstall org"
15
23
python -m pip uninstall paddlepaddle -y
16
24
python -m pip uninstall paddle-custom-gcu -y
17
25
python -m pip install paddlepaddle==3.1.0a0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
26
+ python -m pip install --pre paddle-custom-gcu==3.0.0.dev20250801 -i https://www.paddlepaddle.org.cn/packages/nightly/gcu/
18
27
echo " build whl"
19
28
bash build.sh 1 || exit 1
20
29
21
30
unset http_proxy
22
31
unset https_proxy
23
32
unset no_proxy
24
33
25
- # 起服务
26
34
rm -rf log/*
27
35
rm -f core*
28
- # pkill -9 python #流水线不执行这个
29
- # 清空消息队列
36
+
37
+ # Empty the message queue
30
38
ipcrm --all=msg
39
+ echo " Start server..."
31
40
python -m fastdeploy.entrypoints.openai.api_server \
32
41
--model ${model_path} \
33
42
--port 8188 \
@@ -38,21 +47,40 @@ python -m fastdeploy.entrypoints.openai.api_server \
38
47
--max-num-seqs 8 \
39
48
--quantization wint4 > server.log 2>&1 &
40
49
41
- sleep 60
42
- # 探活
43
- TIMEOUT=$(( 5 * 60 ))
44
- INTERVAL=10 # 检查间隔(秒)
50
+ echo " Waiting 90 seconds..."
51
+ sleep 90
52
+
53
+ if grep -q " Failed to launch worker processes" server.log; then
54
+ echo " Failed to launch worker processes..."
55
+ stop_processes
56
+ cat server.log
57
+ cat log/workerlog.0
58
+ exit 1
59
+ fi
60
+
61
+ if grep -q " Traceback (most recent call last):" server.log; then
62
+ echo " Some errors occurred..."
63
+ stop_processes
64
+ cat server.log
65
+ cat log/workerlog.0
66
+ exit 1
67
+ fi
68
+
69
+ # Health check
70
+ TIMEOUT=$(( 11 * 60 ))
71
+ INTERVAL=30 # Check interval (seconds)
45
72
ENDPOINT=" http://0.0.0.0:8188/health"
46
- START_TIME=$( date +%s) # 记录开始时间戳
47
- echo " 开始服务健康检查,最长等待时间: ${TIMEOUT} 秒 "
73
+ START_TIME=$( date +%s) # Record the start timestamp
74
+ echo " Start the server health check, maximum waiting time: ${TIMEOUT} seconds... "
48
75
while true ; do
49
- # 计算已耗时
76
+ # Used to calculate the time cost
50
77
CURRENT_TIME=$( date +%s)
51
78
ELAPSED=$(( CURRENT_TIME - START_TIME))
52
79
53
- # 超时判断
80
+ # Timeout
54
81
if [ $ELAPSED -ge $TIMEOUT ]; then
55
- echo -e " \n服务启动超时:经过 $(( TIMEOUT/ 60 )) 分钟服务仍未启动!"
82
+ echo -e " \nServer start timeout: After $(( TIMEOUT/ 60 )) minutes, the service still doesn't start!"
83
+ stop_processes
56
84
cat server.log
57
85
cat log/workerlog.0
58
86
exit 1
@@ -61,26 +89,27 @@ while true; do
61
89
HTTP_CODE=$( curl -s -o /dev/null -w " %{http_code}" -m 2 " $ENDPOINT " || true)
62
90
63
91
if [ " $HTTP_CODE " = " 200" ]; then
64
- echo -e " \n服务启动成功!耗时 ${ ELAPSED} 秒 "
92
+ echo -e " \nThe server was successfully launched! Totally takes $(( ELAPSED+ 90 )) seconds. "
65
93
break
66
94
else
67
95
sleep $INTERVAL
68
96
fi
69
97
done
70
98
71
99
cat server.log
100
+ echo -e " \n"
72
101
73
- # 执行服务化推理
102
+ echo " Start inference... "
74
103
python test/ci_use/GCU/run_ernie.py
75
104
exit_code=$?
76
- echo exit_code is ${exit_code}
105
+ echo -e " exit_code is ${exit_code} .\n "
77
106
78
- ps -efww | grep -E ' api_server ' | grep -v grep | awk ' {print $2} ' | xargs kill -9 || true
79
- ps -efww | grep -E ' 8188 ' | grep -v grep | awk ' {print $2} ' | xargs kill -9 || true
80
- lsof -t -i :8188 | xargs kill -9 || true
107
+ echo " Stop server... "
108
+ stop_processes
109
+ echo " Stop server done. "
81
110
82
111
if [ ${exit_code} -ne 0 ]; then
83
- echo " log/workerlog.0"
112
+ echo " Exit with error, please refer to log/workerlog.0"
84
113
cat log/workerlog.0
85
114
exit 1
86
115
fi
0 commit comments