PaddlePaddle
diff --git a/‎.github/workflows/_base_test.yml
Lines changed: 3 additions & 4 deletions b/‎.github/workflows/_base_test.yml
Lines changed: 3 additions & 4 deletions
diff --git a/‎.github/workflows/_build_linux.yml
Lines changed: 1 addition & 3 deletions b/‎.github/workflows/_build_linux.yml
Lines changed: 1 addition & 3 deletions
diff --git a/‎.github/workflows/_logprob_test_linux.yml
Lines changed: 2 additions & 3 deletions b/‎.github/workflows/_logprob_test_linux.yml
Lines changed: 2 additions & 3 deletions
diff --git a/‎.github/workflows/_unit_test_coverage.yml
Lines changed: 2 additions & 3 deletions b/‎.github/workflows/_unit_test_coverage.yml
Lines changed: 2 additions & 3 deletions
diff --git a/‎docs/features/sampling.md
Lines changed: 3 additions & 4 deletions b/‎docs/features/sampling.md
Lines changed: 3 additions & 4 deletions
diff --git a/‎docs/offline_inference.md
Lines changed: 1 addition & 0 deletions b/‎docs/offline_inference.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/zh/features/sampling.md
Lines changed: 3 additions & 4 deletions b/‎docs/zh/features/sampling.md
Lines changed: 3 additions & 4 deletions
diff --git a/‎docs/zh/offline_inference.md
Lines changed: 1 addition & 0 deletions b/‎docs/zh/offline_inference.md
Lines changed: 1 addition & 0 deletions
@@ -121,9 +121,8 @@ jobs:
             # python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
             python -m pip install paddlepaddle-gpu==3.0.0.dev20250729 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
 
-            pip config set global.index-url http://pip.baidu.com/root/baidu/+simple/
-            pip config set install.trusted-host  pip.baidu.com
-            pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+            pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
             python -m pip install ${fastdeploy_wheel_url}
             python -m pip install pytest
 
@@ -150,7 +149,7 @@ jobs:
             export URL=http://localhost:${FD_API_PORT}/v1/chat/completions
             export TEMPLATE=TOKEN_LOGPROB
             TEST_EXIT_CODE=0
-            python -m pytest -sv test_base_chat.py test_compare_top_logprobs.py test_logprobs.py test_params_boundary.py test_seed_usage.py test_stream.py || TEST_EXIT_CODE=1
+            python -m pytest -sv test_base_chat.py test_compare_top_logprobs.py test_logprobs.py test_params_boundary.py test_seed_usage.py test_stream.py test_evil_cases.py || TEST_EXIT_CODE=1
             curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
               -H "Content-Type: application/json" \
               -d "{\"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\", \"--early-stop-config\": \"{\\\"enable_early_stop\\\":true, \\\"window_size\\\":6, \\\"threshold\\\":0.93}\"}"
 
@@ -125,9 +125,7 @@ jobs:
               export FASTDEPLOY_VERSION="${FASTDEPLOY_VERSION}.dev${DATE_ONLY}"
             fi
             python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
-            pip config set global.index-url http://pip.baidu.com/root/baidu/+simple/
-            pip config set install.trusted-host  pip.baidu.com
-            pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+            pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 
             python -m pip install --upgrade pip
             python -m pip install -r requirements.txt
 
@@ -114,9 +114,8 @@ jobs:
           # python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
           python -m pip install paddlepaddle-gpu==3.0.0.dev20250729 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
 
-          pip config set global.index-url http://pip.baidu.com/root/baidu/+simple/
-          pip config set install.trusted-host  pip.baidu.com
-          pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
           python -m pip install ${fastdeploy_wheel_url}
 
           wget https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
 
@@ -96,9 +96,8 @@ jobs:
             # python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
             python -m pip install paddlepaddle-gpu==3.0.0.dev20250729 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
 
-            pip config set global.index-url http://pip.baidu.com/root/baidu/+simple/
-            pip config set install.trusted-host  pip.baidu.com
-            pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+            pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
 
             python -m pip install coverage
             python -m pip install diff-cover
 
@@ -98,7 +98,7 @@ curl -X POST "http://0.0.0.0:9222/v1/chat/completions" \
     {"role": "user", "content": "How old are you"}
   ],
   "top_p": 0.8,
-  "top_k": 50
+  "top_k": 20
 }'
 ```
 
@@ -117,7 +117,7 @@ response = client.chat.completions.create(
     ],
     stream=True,
     top_p=0.8,
-    top_k=50
+    extra_body={"top_k": 20, "min_p":0.1}
 )
 for chunk in response:
     if chunk.choices[0].delta:
@@ -159,8 +159,7 @@ response = client.chat.completions.create(
     ],
     stream=True,
     top_p=0.8,
-    top_k=20,
-    min_p=0.1
+    extra_body={"top_k": 20, "min_p":0.1}
 )
 for chunk in response:
     if chunk.choices[0].delta:
 
@@ -183,6 +183,7 @@ For ```LLM``` configuration, refer to [Parameter Documentation](parameters.md).
 * min_p(float): Minimum probability relative to the maximum probability for a token to be considered (>0 filters low-probability tokens to improve quality)
 * max_tokens(int): Maximum generated tokens (input + output)
 * min_tokens(int): Minimum forced generation length
+* bad_words(list[str]): Prohibited words
 
 ### 2.5 fastdeploy.engine.request.RequestOutput
 
 
@@ -98,7 +98,7 @@ curl -X POST "http://0.0.0.0:9222/v1/chat/completions" \
     {"role": "user", "content": "How old are you"}
   ],
   "top_p": 0.8,
-  "top_k": 50
+  "top_k": 20
 }'
 ```
 
@@ -118,7 +118,7 @@ response = client.chat.completions.create(
     ],
     stream=True,
     top_p=0.8,
-    extra_body={"top_k": 50}
+    extra_body={"top_k": 20}
 )
 for chunk in response:
     if chunk.choices[0].delta:
@@ -161,8 +161,7 @@ response = client.chat.completions.create(
     ],
     stream=True,
     top_p=0.8,
-    extra_body={"top_k": 20},
-    min_p=0.1
+    extra_body={"top_k": 20, "min_p": 0.1}
 )
 for chunk in response:
     if chunk.choices[0].delta:
 
@@ -183,6 +183,7 @@ for output in outputs:
 * min_p(float): token入选的最小概率阈值(相对于最高概率token的比值，设为>0可通过过滤低概率token来提升文本生成质量)
 * max_tokens(int): 限制模型生成的最大token数量（包括输入和输出）
 * min_tokens(int): 强制模型生成的最少token数量，避免过早结束
+* bad_words(list[str]): 禁止生成的词列表, 防止模型生成不希望出现的词
 
 ### 2.5 fastdeploy.engine.request.RequestOutput