diff --git a/.github/workflows/_base_test.yml b/.github/workflows/_base_test.yml new file mode 100644 index 0000000000..6e62e9fb47 --- /dev/null +++ b/.github/workflows/_base_test.yml @@ -0,0 +1,162 @@ +name: Base Test +description: "Run Base Tests" + +on: + workflow_call: + inputs: + DOCKER_IMAGE: + description: "Build Images" + required: true + type: string + default: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:cuda126-py310" + FASTDEPLOY_ARCHIVE_URL: + description: "URL of the compressed FastDeploy code archive." + required: true + type: string + FASTDEPLOY_WHEEL_URL: + description: "URL of the FastDeploy Wheel." + required: true + type: string + CACHE_DIR: + description: "Cache Dir Use" + required: false + type: string + default: "" + MODEL_CACHE_DIR: + description: "Cache Dir Use" + required: false + type: string + default: "" + +jobs: + base_tests: + runs-on: [self-hosted, GPU-h20-1Cards] + steps: + - name: Code Prepare + shell: bash + env: + docker_image: ${{ inputs.DOCKER_IMAGE }} + fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }} + run: | + set -x + REPO="https://github.com/${{ github.repository }}.git" + FULL_REPO="${{ github.repository }}" + REPO_NAME="${FULL_REPO##*/}" + BASE_BRANCH="${{ github.base_ref }}" + + # Clean the repository directory before starting + docker run --rm --net=host -v $(pwd):/workspace -w /workspace \ + -e "REPO_NAME=${REPO_NAME}" \ + ${docker_image} /bin/bash -c ' + if [ -d ${REPO_NAME} ]; then + echo "Directory ${REPO_NAME} exists, removing it..." + rm -rf ${REPO_NAME}* + fi + ' + + wget -q ${fd_archive_url} + tar -xf FastDeploy.tar.gz + rm -rf FastDeploy.tar.gz + cd FastDeploy + git config --global user.name "FastDeployCI" + git config --global user.email "fastdeploy_ci@example.com" + git log -n 3 --oneline + + - name: Run FastDeploy Base Tests + shell: bash + env: + docker_image: ${{ inputs.DOCKER_IMAGE }} + fastdeploy_wheel_url: ${{ inputs.FASTDEPLOY_WHEEL_URL }} + CACHE_DIR: ${{ inputs.CACHE_DIR }} + MODEL_CACHE_DIR: ${{ inputs.MODEL_CACHE_DIR }} + run: | + runner_name="${{ runner.name }}" + last_char="${runner_name: -1}" + + if [[ "$last_char" =~ [0-7] ]]; then + DEVICES="$last_char" + else + DEVICES="0" + fi + + FLASK_PORT=$((42068 + DEVICES * 100)) + FD_API_PORT=$((42088 + DEVICES * 100)) + FD_ENGINE_QUEUE_PORT=$((42058 + DEVICES * 100)) + FD_METRICS_PORT=$((42078 + DEVICES * 100)) + echo "Test ENV Parameter:" + echo "=========================================================" + echo "FLASK_PORT=${FLASK_PORT}" + echo "FD_API_PORT=${FD_API_PORT}" + echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" + echo "FD_METRICS_PORT=${FD_METRICS_PORT}" + echo "DEVICES=${DEVICES}" + echo "=========================================================" + + CACHE_DIR="${CACHE_DIR:-$(dirname "$(dirname "${{ github.workspace }}")")}" + echo "CACHE_DIR is set to ${CACHE_DIR}" + if [ ! -f "${CACHE_DIR}/gitconfig" ]; then + touch "${CACHE_DIR}/gitconfig" + fi + if [ ! -d "${MODEL_CACHE_DIR}" ]; then + echo "Error: MODEL_CACHE_DIR '${MODEL_CACHE_DIR}' does not exist." + exit 1 + fi + + PARENT_DIR=$(dirname "$WORKSPACE") + + docker run --rm --ipc=host --pid=host --net=host \ + -v $(pwd):/workspace \ + -w /workspace \ + -e fastdeploy_wheel_url=${fastdeploy_wheel_url} \ + -e "FD_API_PORT=${FD_API_PORT}" \ + -e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \ + -e "FD_METRICS_PORT=${FD_METRICS_PORT}" \ + -e "FLASK_PORT=${FLASK_PORT}" \ + -v "${MODEL_CACHE_DIR}:/MODELDATA" \ + -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \ + -v "${CACHE_DIR}/.cache:/root/.cache" \ + -v "${CACHE_DIR}/ConfigDir:/root/.config" \ + -e TZ="Asia/Shanghai" \ + --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc ' + # python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/ + python -m pip install paddlepaddle-gpu==3.0.0.dev20250729 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/ + + pip config set global.index-url http://pip.baidu.com/root/baidu/+simple/ + pip config set install.trusted-host pip.baidu.com + pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple + python -m pip install ${fastdeploy_wheel_url} + python -m pip install pytest + + wget https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64 + chmod +x ./llm-deploy-linux-amd64 + ./llm-deploy-linux-amd64 -python python3.10 \ + -model_name ERNIE-4.5-0.3B-Paddle \ + -model_path /MODELDATA \ + --skip install + + git config --global --add safe.directory /workspace/FastDeploy + cd FastDeploy + pushd test/ce/deploy + python3.10 deploy.py > dd.log 2>&1 & + sleep 3 + curl -X POST http://0.0.0.0:${FLASK_PORT}/start \ + -H "Content-Type: application/json" \ + -d "{\"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\"}" + + curl -X POST http://localhost:${FLASK_PORT}/wait_for_infer?timeout=90 + popd + + pushd test/ce/server + export URL=http://localhost:${FD_API_PORT}/v1/chat/completions + export TEMPLATE=TOKEN_LOGPROB + TEST_EXIT_CODE=0 + python -m pytest -sv . || TEST_EXIT_CODE=$? + popd + echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env + ' + if [ -f ./FastDeploy/exit_code.env ]; then + source ./FastDeploy/exit_code.env + cat ./FastDeploy/exit_code.env >> $GITHUB_ENV + fi + echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" + exit ${TEST_EXIT_CODE} diff --git a/.github/workflows/_clone_linux.yml b/.github/workflows/_clone_linux.yml index 34ee2343ee..5efdba50cc 100644 --- a/.github/workflows/_clone_linux.yml +++ b/.github/workflows/_clone_linux.yml @@ -68,7 +68,7 @@ jobs: branch_name=${{ github.ref_name }} target_path=paddle-github-action/BRANCH/FastDeploy/${branch_name}/${commit_id} fi - wget -q --no-proxy --no-check-certificate https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddlePaddle/PaddleTest/tools/bos_tools.py + wget -O bos_tools.py -q --no-proxy --no-check-certificate https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddlePaddle/PaddleTest/tools/bos_tools.py push_file=$(realpath bos_tools.py) python -m pip install bce-python-sdk==0.9.29 ls diff --git a/.github/workflows/_logprob_test_linux.yml b/.github/workflows/_logprob_test_linux.yml index 3a6aff7de1..366beaecbb 100644 --- a/.github/workflows/_logprob_test_linux.yml +++ b/.github/workflows/_logprob_test_linux.yml @@ -70,10 +70,18 @@ jobs: DEVICES="0" fi - FLASK_PORT=$((9160 + DEVICES * 100)) - FD_API_PORT=$((9180 + DEVICES * 100)) - FD_ENGINE_QUEUE_PORT=$((9150 + DEVICES * 100)) - FD_METRICS_PORT=$((9170 + DEVICES * 100)) + FLASK_PORT=$((42068 + DEVICES * 100)) + FD_API_PORT=$((42088 + DEVICES * 100)) + FD_ENGINE_QUEUE_PORT=$((42058 + DEVICES * 100)) + FD_METRICS_PORT=$((42078 + DEVICES * 100)) + echo "Test ENV Parameter:" + echo "=========================================================" + echo "FLASK_PORT=${FLASK_PORT}" + echo "FD_API_PORT=${FD_API_PORT}" + echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" + echo "FD_METRICS_PORT=${FD_METRICS_PORT}" + echo "DEVICES=${DEVICES}" + echo "=========================================================" CACHE_DIR="${CACHE_DIR:-$(dirname "$(dirname "${{ github.workspace }}")")}" echo "CACHE_DIR is set to ${CACHE_DIR}" @@ -86,8 +94,10 @@ jobs: fi PARENT_DIR=$(dirname "$WORKSPACE") + unset http_proxy + unset https_proxy - docker run --ipc=host --pid=host --net=host \ + docker run --rm --ipc=host --pid=host --net=host \ -v $(pwd):/workspace \ -w /workspace \ -e fastdeploy_wheel_url=${fastdeploy_wheel_url} \ @@ -100,7 +110,7 @@ jobs: -v "${CACHE_DIR}/.cache:/root/.cache" \ -v "${CACHE_DIR}/ConfigDir:/root/.config" \ -e TZ="Asia/Shanghai" \ - --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -c ' + --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc ' # python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/ python -m pip install paddlepaddle-gpu==3.0.0.dev20250729 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/ @@ -124,6 +134,10 @@ jobs: -d "{\"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\"}" curl -X POST http://localhost:${FLASK_PORT}/wait_for_infer?timeout=90 + curl -s -o /dev/null -w "%{http_code}" -m 2 "http://0.0.0.0:${FD_API_PORT}/health" + curl -X POST "http://0.0.0.0:${FD_API_PORT}/v1/chat/completions" \ + -H "Content-Type: application/json" \ + -d "{\"messages\": [{\"role\": \"user\", \"content\": \"1+1=?\"}], \"logprobs\": true}" set +e rm -rf ./baseline_output cp -r baseline/ERNIE-4.5-0.3B-Paddle ./baseline_output diff --git a/.github/workflows/pr_build_and_test.yml b/.github/workflows/pr_build_and_test.yml index 0123e5a554..7ba2e7f3ef 100644 --- a/.github/workflows/pr_build_and_test.yml +++ b/.github/workflows/pr_build_and_test.yml @@ -19,7 +19,7 @@ jobs: needs: clone uses: ./.github/workflows/_build_linux.yml with: - DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:cuda126-py310 + DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} COMPILE_ARCH: "89,90" WITH_NIGHTLY_BUILD: "OFF" @@ -39,7 +39,7 @@ jobs: needs: [clone,build] uses: ./.github/workflows/_unit_test_coverage.yml with: - DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:cuda126-py310 + DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} @@ -48,7 +48,7 @@ jobs: needs: [build] uses: ./.github/workflows/_logprob_test_linux.yml with: - DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:cuda126-py310 + DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate PADDLETEST_ARCHIVE_URL: "https://xly-devops.bj.bcebos.com/PaddleTest/PaddleTest.tar.gz" FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelCache" @@ -61,3 +61,13 @@ jobs: DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:fastdeploy-ciuse-cuda126 FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} + + base_test: + name: Run Base Tests + needs: [clone,build] + uses: ./.github/workflows/_base_test.yml + with: + DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate + FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} + FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} + MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelCache" diff --git a/test/ce/deploy/deploy.py b/test/ce/deploy/deploy.py new file mode 100644 index 0000000000..5ec7e1f22a --- /dev/null +++ b/test/ce/deploy/deploy.py @@ -0,0 +1,456 @@ +import ast +import json +import os +import re +import signal +import socket +import subprocess +import sys +import time + +import requests +import yaml +from flask import Flask, Response, jsonify, request + +app = Flask(__name__) + + +def get_base_port(): + nv_visible_devices = os.environ.get("NVIDIA_VISIBLE_DEVICES", "") + if not nv_visible_devices or nv_visible_devices.lower() == "all": + return 8000 + # 提取第一个数字 + match = re.search(r"\d+", nv_visible_devices) + if match: + return int(match.group(0)) * 100 + 8000 + return 8000 + + +# 默认参数值 +PID_FILE = "pid_port" +LOG_FILE = "server.log" +base_port = get_base_port() +FLASK_PORT = int(os.environ.get("FLASK_PORT", base_port + 1)) +FD_API_PORT = int(os.environ.get("FD_API_PORT", base_port + 2)) +FD_ENGINE_QUEUE_PORT = int(os.environ.get("FD_ENGINE_QUEUE_PORT", base_port + 3)) +FD_METRICS_PORT = int(os.environ.get("FD_METRICS_PORT", base_port + 4)) +DEFAULT_PARAMS = { + "--port": FD_API_PORT, + "--engine-worker-queue-port": FD_ENGINE_QUEUE_PORT, + "--metrics-port": FD_METRICS_PORT, + "--enable-logprob": True, +} + + +def build_command(config): + """根据配置构建启动命令""" + # 基础命令 + cmd = [ + "python", + "-m", + "fastdeploy.entrypoints.openai.api_server", + ] + + # 添加配置参数 + for key, value in config.items(): + if "--enable" in key: + if value: + cmd.append(key) + else: + cmd.extend([key, str(value)]) + + return cmd + + +def merge_configs(base_config, override_config): + """合并配置,优先级:override_config > base_config""" + merged = base_config.copy() + + if override_config: + for key in override_config: + merged[key] = override_config[key] + + return merged + + +def is_port_in_use(port): + """检查端口是否被占用""" + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(("localhost", port)) == 0 + + +def get_server_pid(): + """获取服务进程ID PORT""" + if os.path.exists(PID_FILE): + with open(PID_FILE, "r") as f: + data = yaml.safe_load(f) + return data + return None + + +def is_server_running(): + """检查服务是否正在运行""" + pid_port = get_server_pid() + if pid_port is None: + return False, {"status": "Server not running..."} + + _, port = pid_port["PID"], pid_port["PORT"] + health_check_endpoint = f"http://0.0.0.0:{port}/health" + + if os.path.exists(LOG_FILE): + with open(LOG_FILE, "r") as f: + msg = f.readlines() + result = parse_tqdm_progress(msg) + + try: + response = requests.get(health_check_endpoint, timeout=2) + return response.status_code == 200, result + except requests.exceptions.RequestException: + return False, result + + +def parse_tqdm_progress(log_lines): + """ + 解析 tqdm 风格的进度条 + """ + tqdm_pattern = re.compile( + r"(?P.+?):\s+(?P\d+)%\|(?P.+?)\|\s+(?P\d+/\d+)\s+\[(?P\d+:\d+)<(?P\d+:\d+),\s+(?P[\d\.]+it/s)\]" + ) + + for line in reversed(log_lines): + match = tqdm_pattern.search(line) + if match: + data = match.groupdict() + return { + "status": "服务启动中", + "progress": { + "percent": int(data["percent"]), + "step": data["step"], + "speed": data["speed"], + "eta": data["eta"], + "elapsed": data["elapsed"], + "bar": data["bar"].strip(), + }, + "raw_line": line.strip(), + } + return {"status": "服务启动中", "progress": {}, "raw_line": log_lines[-1] if log_lines else "server.log为空"} + + +def stop_server(signum=None, frame=None): + """停止大模型推理服务""" + pid_port = get_server_pid() + if pid_port is None: + if signum: + sys.exit(0) + return jsonify({"status": "error", "message": "Service is not running"}), 400 + + server_pid, _ = pid_port["PID"], pid_port["PORT"] + + # 清理PID文件 + if os.path.exists(PID_FILE): + os.remove(PID_FILE) + if os.path.exists("gemm_profiles.json"): + os.remove("gemm_profiles.json") + + try: + # 终止进程组(包括所有子进程) + os.killpg(os.getpgid(pid_port["PID"]), signal.SIGTERM) + except Exception as e: + print(f"Failed to stop server: {e}") + + for port in [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]: + try: + output = subprocess.check_output(f"lsof -i:{port} -t", shell=True).decode().strip() + for pid in output.splitlines(): + os.kill(int(pid), signal.SIGKILL) + print(f"Killed process on port {port}, pid={pid}") + except Exception as e: + print(f"Failed to killed process on port: {e}") + # 若log目录存在,则重命名为log_timestamp + if os.path.isdir("./log"): + os.rename("./log", "./log_{}".format(time.strftime("%Y%m%d%H%M%S"))) + + if signum: + sys.exit(0) + + return jsonify({"status": "success", "message": "Service stopped", "pid": server_pid}), 200 + + +# 捕获 SIGINT (Ctrl+C) 和 SIGTERM (kill) +signal.signal(signal.SIGINT, stop_server) +signal.signal(signal.SIGTERM, stop_server) + + +@app.route("/start", methods=["POST"]) +def start_service(): + """启动大模型推理服务""" + # 检查服务是否已在运行 + if is_server_running()[0]: + return Response( + json.dumps({"status": "error", "message": "服务已启动,无需start"}, ensure_ascii=False), + status=400, + content_type="application/json", + ) + + try: + base_config = DEFAULT_PARAMS + + override_config = request.get_json() or {} + + final_config = merge_configs(base_config, override_config) + + global FD_API_PORT + global FD_ENGINE_QUEUE_PORT + global FD_METRICS_PORT + FD_API_PORT = final_config["--port"] + FD_ENGINE_QUEUE_PORT = final_config["--engine-worker-queue-port"] + FD_METRICS_PORT = final_config["--metrics-port"] + + # 构建命令 + cmd = build_command(final_config) + except Exception as e: + return Response( + json.dumps({"status": "error", "message": str(e)}, ensure_ascii=False), + status=500, + content_type="application/json", + ) + + print("cmd", cmd) + + try: + # 设置环境变量并启动进程 + env = os.environ.copy() + + with open(LOG_FILE, "w") as log: + process = subprocess.Popen(cmd, stdout=log, stderr=log, env=env, start_new_session=True) + + # 保存进程ID,port到yaml文件 + with open(PID_FILE, "w") as f: + yaml.dump({"PID": process.pid, "PORT": final_config["--port"]}, f) + + json_data = { + "status": "success", + "message": "服务启动命令已执行", + "pid": process.pid, + "config": final_config, + "log_file": LOG_FILE, + "cmd": cmd, + "port_info": { + "api_port": FD_API_PORT, + "queue_port": FD_ENGINE_QUEUE_PORT, + "metrics_port": FD_METRICS_PORT, + }, + } + + return Response(json.dumps(json_data, ensure_ascii=False), status=200, content_type="application/json") + except Exception as e: + return Response( + json.dumps({"status": "error", "message": str(e)}, ensure_ascii=False), + status=500, + content_type="application/json", + ) + + +@app.route("/switch", methods=["POST"]) +def switch_service(): + """切换模型服务""" + # kill掉已有服务 + stop_server() + time.sleep(2) + + try: + base_config = DEFAULT_PARAMS + + override_config = request.get_json() or {} + + final_config = merge_configs(base_config, override_config) + + global FD_API_PORT + global FD_ENGINE_QUEUE_PORT + global FD_METRICS_PORT + FD_API_PORT = final_config["--port"] + FD_ENGINE_QUEUE_PORT = final_config["--engine-worker-queue-port"] + FD_METRICS_PORT = final_config["--metrics-port"] + + # 构建命令 + cmd = build_command(final_config) + except Exception as e: + return Response( + json.dumps({"status": "error", "message": str(e)}, ensure_ascii=False), + status=500, + content_type="application/json", + ) + + print("cmd", cmd) + + try: + # 设置环境变量并启动进程 + env = os.environ.copy() + + with open(LOG_FILE, "w") as log: + process = subprocess.Popen(cmd, stdout=log, stderr=log, env=env, start_new_session=True) + + # 保存进程ID,port到yaml文件 + with open(PID_FILE, "w") as f: + yaml.dump({"PID": process.pid, "PORT": final_config["--port"]}, f) + + json_data = { + "status": "success", + "message": "服务启动命令已执行", + "pid": process.pid, + "config": final_config, + "log_file": LOG_FILE, + "cmd": cmd, + "port_info": { + "api_port": FD_API_PORT, + "queue_port": FD_ENGINE_QUEUE_PORT, + "metrics_port": FD_METRICS_PORT, + }, + } + + return Response(json.dumps(json_data, ensure_ascii=False), status=200, content_type="application/json") + except Exception as e: + return Response( + json.dumps({"status": "error", "message": str(e)}, ensure_ascii=False), + status=500, + content_type="application/json", + ) + + +@app.route("/status", methods=["GET", "POST"]) +def service_status(): + """检查服务状态""" + health, msg = is_server_running() + + if not health: + return Response(json.dumps(msg, ensure_ascii=False), status=500, content_type="application/json") + + # 检查端口是否监听 + ports_status = { + "api_port": FD_API_PORT if is_port_in_use(FD_API_PORT) else None, + "queue_port": FD_ENGINE_QUEUE_PORT if is_port_in_use(FD_ENGINE_QUEUE_PORT) else None, + "metrics_port": FD_METRICS_PORT if is_port_in_use(FD_METRICS_PORT) else None, + } + + msg["status"] = "服务启动完成" + msg["ports_status"] = ports_status + + return Response(json.dumps(msg, ensure_ascii=False), status=200, content_type="application/json") + + +@app.route("/stop", methods=["POST"]) +def stop_service(): + """停止大模型推理服务""" + res, status_code = stop_server() + + return res, status_code + + +@app.route("/config", methods=["GET"]) +def get_config(): + """获取当前server配置""" + health, msg = is_server_running() + + if not health: + return Response(json.dumps(msg, ensure_ascii=False), status=500, content_type="application/json") + + if not os.path.exists("log/api_server.log"): + return Response( + json.dumps({"message": "api_server.log不存在"}, ensure_ascii=False), + status=500, + content_type="application/json", + ) + + try: + # 筛选出包含"args:"的行 + with open("log/api_server.log", "r") as f: + lines = [line for line in f.readlines() if "args:" in line] + + last_line = lines[-1] if lines else "" + + # 使用正则表达式提取JSON格式的配置 + match = re.search(r"args\s*[::]\s*(.*)", last_line) + if not match: + return Response( + json.dumps({"message": "api_server.log中没有args信息,请检查log"}, ensure_ascii=False), + status=500, + content_type="application/json", + ) + + # 尝试解析JSON + config_json = match.group(1).strip() + config_data = ast.literal_eval(config_json) + print("config_data", config_data, type(config_data)) + return Response( + json.dumps({"server_config": config_data}, ensure_ascii=False), status=200, content_type="application/json" + ) + + except Exception as e: + return Response( + json.dumps({"message": "api_server.log解析失败,请检查log", "error": str(e)}, ensure_ascii=False), + status=500, + content_type="application/json", + ) + + +@app.route("/wait_for_infer", methods=["POST"]) +def wait_for_infer(): + timeout = int(request.args.get("timeout", 120)) # 可选超时时间,默认120秒 + interval = 2 + response_interval = 10 + start_time = time.time() + next_response_time = start_time + + def generate(): + nonlocal next_response_time + while True: + health, msg = is_server_running() + now = time.time() + + elapsed = time.time() - start_time + + if health: + ports_status = { + "api_port": FD_API_PORT if is_port_in_use(FD_API_PORT) else None, + "queue_port": FD_ENGINE_QUEUE_PORT if is_port_in_use(FD_ENGINE_QUEUE_PORT) else None, + "metrics_port": FD_METRICS_PORT if is_port_in_use(FD_METRICS_PORT) else None, + } + msg["status"] = "服务启动完成" + msg["ports_status"] = ports_status + yield json.dumps(msg, ensure_ascii=False) + "\n" + break + + if elapsed >= timeout: + + def tail_file(path, lines=50): + try: + with open(path, "r", encoding="utf-8", errors="ignore") as f: + return "".join(f.readlines()[-lines:]) + except Exception as e: + return f"[无法读取 {path}]: {e}\n" + + result = f"服务启动超时,耗时:[{timeout}s]\n\n" + result += "==== server.log tail 50 ====\n" + result += tail_file("server.log") + result += "\n==== log/workerlog.0 tail 50 ====\n" + result += tail_file("log/workerlog.0") + + yield result + break + + if now >= next_response_time: + msg["status"] = f"服务启动中,耗时:[{int(elapsed)}s]" + yield json.dumps(msg, ensure_ascii=False) + "\n" + next_response_time += response_interval + + time.sleep(interval) + + return Response(generate(), status=200, content_type="text/plain") + + +if __name__ == "__main__": + print(f"FLASK_PORT: {FLASK_PORT}") + print(f"FD_API_PORT: {FD_API_PORT}") + print(f"FD_ENGINE_QUEUE_PORT: {FD_ENGINE_QUEUE_PORT}") + print(f"FD_METRICS_PORT: {FD_METRICS_PORT}") + app.run(host="0.0.0.0", port=FLASK_PORT, debug=False)