From 160f5b259877c7bcf9be1cc0bd0749efc83035f7 Mon Sep 17 00:00:00 2001
From: ltd0924 <32387785+ltd0924@users.noreply.github.com>
Date: Fri, 8 Aug 2025 13:18:14 +0800
Subject: [PATCH 01/14] Create test_generation.py

---
 test/entrypoints/test_generation.py | 100 ++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 test/entrypoints/test_generation.py

diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py
new file mode 100644
index 0000000000..0c5b5d0a87
--- /dev/null
+++ b/test/entrypoints/test_generation.py
@@ -0,0 +1,100 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import weakref
+
+import pytest
+
+from fastdeploy.engine.request import RequestOutput
+from fastdeploy.engine.sampling_params import SamplingParams
+from fastdeploy.entrypoints.llm import LLM
+from fastdeploy.utils import get_random_port
+
+MODEL_NAME = "PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle"
+
+PROMPTS = [
+    "Hello, my name is",
+    "The capital of China is",
+    "The future of AI is",
+    "人工智能是",
+]
+
+TOKEN_IDS = [
+    [0],
+    [0, 1],
+    [0, 1, 3],
+    [0, 2, 4, 6],
+]
+
+
+@pytest.fixture(scope="module")
+def llm():
+    # pytest caches the fixture so we use weakref.proxy to
+    # enable garbage collection
+    llm = LLM(
+        model=MODEL_NAME,
+        max_num_batched_tokens=4096,
+        tensor_parallel_size=1,
+        engine_worker_queue_port=get_random_port(),
+    )
+    yield weakref.proxy(llm)
+
+
+def assert_outputs_equal(o1: list[RequestOutput], o2: list[RequestOutput]):
+    assert [o.outputs for o in o1] == [o.outputs for o in o2]
+
+
+@pytest.mark.parametrize("prompt_token_ids", TOKEN_IDS)
+def test_consistency_single_prompt_tokens(llm: LLM, prompt_token_ids):
+    sampling_params = SamplingParams(temperature=1.0, top_p=0.0)
+
+    output1 = llm.generate(prompts=prompt_token_ids, sampling_params=sampling_params)
+
+    output2 = llm.generate({"prompt": "", "prompt_token_ids": prompt_token_ids}, sampling_params=sampling_params)
+    assert_outputs_equal(output1, output2)
+
+
+def test_api_consistency_multi_prompt_tokens(llm: LLM):
+    sampling_params = SamplingParams(
+        temperature=1.0,
+        top_p=0.0,
+    )
+
+    output1 = llm.generate(prompts=TOKEN_IDS, sampling_params=sampling_params)
+
+    output2 = llm.generate(
+        [{"prompt": "", "prompt_token_ids": p} for p in TOKEN_IDS],
+        sampling_params=sampling_params,
+    )
+
+    assert_outputs_equal(output1, output2)
+
+
+def test_multiple_sampling_params(llm: LLM):
+    sampling_params = [
+        SamplingParams(temperature=0.01, top_p=0.95),
+        SamplingParams(temperature=0.3, top_p=0.95),
+        SamplingParams(temperature=0.7, top_p=0.95),
+        SamplingParams(temperature=0.99, top_p=0.95),
+    ]
+
+    # Multiple SamplingParams should be matched with each prompt
+    outputs = llm.generate(prompts=PROMPTS, sampling_params=sampling_params)
+    assert len(PROMPTS) == len(outputs)
+
+    # Exception raised, if the size of params does not match the size of prompts
+    with pytest.raises(ValueError):
+        outputs = llm.generate(prompts=PROMPTS, sampling_params=sampling_params[:3])
+
+    # Single SamplingParams should be applied to every prompt
+    single_sampling_params = SamplingParams(temperature=0.3, top_p=0.95)
+    outputs = llm.generate(prompts=PROMPTS, sampling_params=single_sampling_params)
+    assert len(PROMPTS) == len(outputs)
+
+    # sampling_params is None, default params should be applied
+    outputs = llm.generate(prompts=PROMPTS, sampling_params=None)
+    assert len(PROMPTS) == len(outputs)
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])

From 1c05330b5cd48a5ce63435992515e8e86f058b54 Mon Sep 17 00:00:00 2001
From: ltd0924 <ltd0924@sina.com>
Date: Mon, 11 Aug 2025 19:12:25 +0800
Subject: [PATCH 02/14] update

---
 test/entrypoints/test_generation.py | 190 ++++++++++++++++------------
 1 file changed, 109 insertions(+), 81 deletions(-)

diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py
index 0c5b5d0a87..b12720ead7 100644
--- a/test/entrypoints/test_generation.py
+++ b/test/entrypoints/test_generation.py
@@ -2,99 +2,127 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import weakref
+import unittest
 
-import pytest
 
 from fastdeploy.engine.request import RequestOutput
 from fastdeploy.engine.sampling_params import SamplingParams
 from fastdeploy.entrypoints.llm import LLM
 from fastdeploy.utils import get_random_port
 
-MODEL_NAME = "PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle"
+MODEL_NAME = "/root/PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle"
 
-PROMPTS = [
-    "Hello, my name is",
-    "The capital of China is",
-    "The future of AI is",
-    "人工智能是",
-]
 
-TOKEN_IDS = [
-    [0],
-    [0, 1],
-    [0, 1, 3],
-    [0, 2, 4, 6],
-]
+class TestGeneration(unittest.TestCase):
+    """Test case for generation functionality"""
 
-
-@pytest.fixture(scope="module")
-def llm():
-    # pytest caches the fixture so we use weakref.proxy to
-    # enable garbage collection
-    llm = LLM(
-        model=MODEL_NAME,
-        max_num_batched_tokens=4096,
-        tensor_parallel_size=1,
-        engine_worker_queue_port=get_random_port(),
-    )
-    yield weakref.proxy(llm)
-
-
-def assert_outputs_equal(o1: list[RequestOutput], o2: list[RequestOutput]):
-    assert [o.outputs for o in o1] == [o.outputs for o in o2]
-
-
-@pytest.mark.parametrize("prompt_token_ids", TOKEN_IDS)
-def test_consistency_single_prompt_tokens(llm: LLM, prompt_token_ids):
-    sampling_params = SamplingParams(temperature=1.0, top_p=0.0)
-
-    output1 = llm.generate(prompts=prompt_token_ids, sampling_params=sampling_params)
-
-    output2 = llm.generate({"prompt": "", "prompt_token_ids": prompt_token_ids}, sampling_params=sampling_params)
-    assert_outputs_equal(output1, output2)
-
-
-def test_api_consistency_multi_prompt_tokens(llm: LLM):
-    sampling_params = SamplingParams(
-        temperature=1.0,
-        top_p=0.0,
-    )
-
-    output1 = llm.generate(prompts=TOKEN_IDS, sampling_params=sampling_params)
-
-    output2 = llm.generate(
-        [{"prompt": "", "prompt_token_ids": p} for p in TOKEN_IDS],
-        sampling_params=sampling_params,
-    )
-
-    assert_outputs_equal(output1, output2)
-
-
-def test_multiple_sampling_params(llm: LLM):
-    sampling_params = [
-        SamplingParams(temperature=0.01, top_p=0.95),
-        SamplingParams(temperature=0.3, top_p=0.95),
-        SamplingParams(temperature=0.7, top_p=0.95),
-        SamplingParams(temperature=0.99, top_p=0.95),
+    TOKEN_IDS = [
+        [0],
+        [0, 1],
+        [0, 1, 3],
+        [0, 2, 4, 6],
     ]
 
-    # Multiple SamplingParams should be matched with each prompt
-    outputs = llm.generate(prompts=PROMPTS, sampling_params=sampling_params)
-    assert len(PROMPTS) == len(outputs)
-
-    # Exception raised, if the size of params does not match the size of prompts
-    with pytest.raises(ValueError):
-        outputs = llm.generate(prompts=PROMPTS, sampling_params=sampling_params[:3])
-
-    # Single SamplingParams should be applied to every prompt
-    single_sampling_params = SamplingParams(temperature=0.3, top_p=0.95)
-    outputs = llm.generate(prompts=PROMPTS, sampling_params=single_sampling_params)
-    assert len(PROMPTS) == len(outputs)
-
-    # sampling_params is None, default params should be applied
-    outputs = llm.generate(prompts=PROMPTS, sampling_params=None)
-    assert len(PROMPTS) == len(outputs)
+    PROMPTS = [
+        "Hello, my name is",
+        "The capital of China is",
+        "The future of AI is",
+        "人工智能是",
+    ]
 
+    @classmethod
+    def setUpClass(cls):
+        """Set up test environment before any tests run"""
+        cls.llm = weakref.proxy(LLM(
+            model=MODEL_NAME,
+            max_num_batched_tokens=4096,
+            tensor_parallel_size=1,
+            engine_worker_queue_port=get_random_port(),
+        ))
+
+    @classmethod
+    def tearDownClass(cls):
+        """Clean up after all tests have run"""
+        if hasattr(cls, 'llm'):
+            del cls.llm
+
+
+    def assert_outputs_equal(self, o1: list[RequestOutput], o2: list[RequestOutput]):
+        self.assertEqual([o.outputs for o in o1], [o.outputs for o in o2])
+
+    def test_consistency_single_prompt_tokens(self):
+        """Test consistency between different prompt input formats"""
+        sampling_params = SamplingParams(temperature=1.0, top_p=0.0)
+        
+        for prompt_token_ids in self.TOKEN_IDS:
+            with self.subTest(prompt_token_ids=prompt_token_ids):
+                output1 = self.llm.generate(
+                    prompts=prompt_token_ids, 
+                    sampling_params=sampling_params
+                )
+                output2 = self.llm.generate(
+                    {"prompt": "", "prompt_token_ids": prompt_token_ids},
+                    sampling_params=sampling_params
+                )
+                self.assert_outputs_equal(output1, output2)
+
+
+    def test_api_consistency_multi_prompt_tokens(self):
+        """Test consistency with multiple prompt tokens"""
+        sampling_params = SamplingParams(
+            temperature=1.0,
+            top_p=0.0,
+        )
+
+        output1 = self.llm.generate(
+            prompts=self.TOKEN_IDS, 
+            sampling_params=sampling_params
+        )
+
+        output2 = self.llm.generate(
+            [{"prompt": "", "prompt_token_ids": p} for p in self.TOKEN_IDS],
+            sampling_params=sampling_params,
+        )
+
+        self.assert_outputs_equal(output1, output2)
+
+    def test_multiple_sampling_params(self):
+        """Test multiple sampling parameters combinations"""
+        sampling_params = [
+            SamplingParams(temperature=0.01, top_p=0.95),
+            SamplingParams(temperature=0.3, top_p=0.95),
+            SamplingParams(temperature=0.7, top_p=0.95),
+            SamplingParams(temperature=0.99, top_p=0.95),
+        ]
+
+        # Multiple SamplingParams should be matched with each prompt
+        outputs = self.llm.generate(
+            prompts=self.PROMPTS, 
+            sampling_params=sampling_params
+        )
+        self.assertEqual(len(self.PROMPTS), len(outputs))
+
+        # Exception raised if size mismatch
+        with self.assertRaises(ValueError):
+            self.llm.generate(
+                prompts=self.PROMPTS,
+                sampling_params=sampling_params[:3]
+            )
+
+        # Single SamplingParams should be applied to every prompt
+        single_sampling_params = SamplingParams(temperature=0.3, top_p=0.95)
+        outputs = self.llm.generate(
+            prompts=self.PROMPTS,
+            sampling_params=single_sampling_params
+        )
+        self.assertEqual(len(self.PROMPTS), len(outputs))
+
+        # sampling_params is None, default params should be applied
+        outputs = self.llm.generate(
+            prompts=self.PROMPTS,
+            sampling_params=None
+        )
+        self.assertEqual(len(self.PROMPTS), len(outputs))
 
 if __name__ == "__main__":
-    pytest.main([__file__])
+    unittest.main()

From ec29a7392add297b78fb0287a20a817c589c651e Mon Sep 17 00:00:00 2001
From: ltd0924 <ltd0924@sina.com>
Date: Mon, 11 Aug 2025 19:17:47 +0800
Subject: [PATCH 03/14] update

---
 test/entrypoints/test_generation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py
index b12720ead7..61ec53882d 100644
--- a/test/entrypoints/test_generation.py
+++ b/test/entrypoints/test_generation.py
@@ -3,14 +3,14 @@
 
 import weakref
 import unittest
-
+import os
 
 from fastdeploy.engine.request import RequestOutput
 from fastdeploy.engine.sampling_params import SamplingParams
 from fastdeploy.entrypoints.llm import LLM
 from fastdeploy.utils import get_random_port
 
-MODEL_NAME = "/root/PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle"
+MODEL_NAME = os.get_env("MODEL_PATH") + "/ERNIE-4.5-0.3B-Paddle"
 
 
 class TestGeneration(unittest.TestCase):

From 4ca5cd99d48df19c597b18ce484a253b8aa09f3b Mon Sep 17 00:00:00 2001
From: ltd0924 <ltd0924@sina.com>
Date: Mon, 11 Aug 2025 19:21:34 +0800
Subject: [PATCH 04/14] format

---
 test/entrypoints/test_generation.py | 58 ++++++++++-------------------
 1 file changed, 20 insertions(+), 38 deletions(-)

diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py
index 61ec53882d..0e8b33ce6d 100644
--- a/test/entrypoints/test_generation.py
+++ b/test/entrypoints/test_generation.py
@@ -1,9 +1,9 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import weakref
-import unittest
 import os
+import unittest
+import weakref
 
 from fastdeploy.engine.request import RequestOutput
 from fastdeploy.engine.sampling_params import SamplingParams
@@ -33,40 +33,36 @@ class TestGeneration(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up test environment before any tests run"""
-        cls.llm = weakref.proxy(LLM(
-            model=MODEL_NAME,
-            max_num_batched_tokens=4096,
-            tensor_parallel_size=1,
-            engine_worker_queue_port=get_random_port(),
-        ))
+        cls.llm = weakref.proxy(
+            LLM(
+                model=MODEL_NAME,
+                max_num_batched_tokens=4096,
+                tensor_parallel_size=1,
+                engine_worker_queue_port=get_random_port(),
+            )
+        )
 
     @classmethod
     def tearDownClass(cls):
         """Clean up after all tests have run"""
-        if hasattr(cls, 'llm'):
+        if hasattr(cls, "llm"):
             del cls.llm
 
-
     def assert_outputs_equal(self, o1: list[RequestOutput], o2: list[RequestOutput]):
         self.assertEqual([o.outputs for o in o1], [o.outputs for o in o2])
 
     def test_consistency_single_prompt_tokens(self):
         """Test consistency between different prompt input formats"""
         sampling_params = SamplingParams(temperature=1.0, top_p=0.0)
-        
+
         for prompt_token_ids in self.TOKEN_IDS:
             with self.subTest(prompt_token_ids=prompt_token_ids):
-                output1 = self.llm.generate(
-                    prompts=prompt_token_ids, 
-                    sampling_params=sampling_params
-                )
+                output1 = self.llm.generate(prompts=prompt_token_ids, sampling_params=sampling_params)
                 output2 = self.llm.generate(
-                    {"prompt": "", "prompt_token_ids": prompt_token_ids},
-                    sampling_params=sampling_params
+                    {"prompt": "", "prompt_token_ids": prompt_token_ids}, sampling_params=sampling_params
                 )
                 self.assert_outputs_equal(output1, output2)
 
-
     def test_api_consistency_multi_prompt_tokens(self):
         """Test consistency with multiple prompt tokens"""
         sampling_params = SamplingParams(
@@ -74,10 +70,7 @@ def test_api_consistency_multi_prompt_tokens(self):
             top_p=0.0,
         )
 
-        output1 = self.llm.generate(
-            prompts=self.TOKEN_IDS, 
-            sampling_params=sampling_params
-        )
+        output1 = self.llm.generate(prompts=self.TOKEN_IDS, sampling_params=sampling_params)
 
         output2 = self.llm.generate(
             [{"prompt": "", "prompt_token_ids": p} for p in self.TOKEN_IDS],
@@ -96,33 +89,22 @@ def test_multiple_sampling_params(self):
         ]
 
         # Multiple SamplingParams should be matched with each prompt
-        outputs = self.llm.generate(
-            prompts=self.PROMPTS, 
-            sampling_params=sampling_params
-        )
+        outputs = self.llm.generate(prompts=self.PROMPTS, sampling_params=sampling_params)
         self.assertEqual(len(self.PROMPTS), len(outputs))
 
         # Exception raised if size mismatch
         with self.assertRaises(ValueError):
-            self.llm.generate(
-                prompts=self.PROMPTS,
-                sampling_params=sampling_params[:3]
-            )
+            self.llm.generate(prompts=self.PROMPTS, sampling_params=sampling_params[:3])
 
         # Single SamplingParams should be applied to every prompt
         single_sampling_params = SamplingParams(temperature=0.3, top_p=0.95)
-        outputs = self.llm.generate(
-            prompts=self.PROMPTS,
-            sampling_params=single_sampling_params
-        )
+        outputs = self.llm.generate(prompts=self.PROMPTS, sampling_params=single_sampling_params)
         self.assertEqual(len(self.PROMPTS), len(outputs))
 
         # sampling_params is None, default params should be applied
-        outputs = self.llm.generate(
-            prompts=self.PROMPTS,
-            sampling_params=None
-        )
+        outputs = self.llm.generate(prompts=self.PROMPTS, sampling_params=None)
         self.assertEqual(len(self.PROMPTS), len(outputs))
 
+
 if __name__ == "__main__":
     unittest.main()

From 739bbb9f753cee40e172eddd727dba19ee8a55c4 Mon Sep 17 00:00:00 2001
From: ltd0924 <32387785+ltd0924@users.noreply.github.com>
Date: Tue, 12 Aug 2025 10:41:56 +0800
Subject: [PATCH 05/14] Update test_generation.py

---
 test/entrypoints/test_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py
index 0e8b33ce6d..ad17fb07f7 100644
--- a/test/entrypoints/test_generation.py
+++ b/test/entrypoints/test_generation.py
@@ -10,7 +10,7 @@
 from fastdeploy.entrypoints.llm import LLM
 from fastdeploy.utils import get_random_port
 
-MODEL_NAME = os.get_env("MODEL_PATH") + "/ERNIE-4.5-0.3B-Paddle"
+MODEL_NAME = os.getenv("MODEL_PATH") + "/ERNIE-4.5-0.3B-Paddle"
 
 
 class TestGeneration(unittest.TestCase):

From 880c4cc1bead836dfed2ed09f5734da1c0dea2fd Mon Sep 17 00:00:00 2001
From: ltd0924 <32387785+ltd0924@users.noreply.github.com>
Date: Tue, 12 Aug 2025 21:53:18 +0800
Subject: [PATCH 06/14] Update test_generation.py

---
 test/entrypoints/test_generation.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py
index ad17fb07f7..994d5d37eb 100644
--- a/test/entrypoints/test_generation.py
+++ b/test/entrypoints/test_generation.py
@@ -10,7 +10,7 @@
 from fastdeploy.entrypoints.llm import LLM
 from fastdeploy.utils import get_random_port
 
-MODEL_NAME = os.getenv("MODEL_PATH") + "/ERNIE-4.5-0.3B-Paddle"
+MODEL_NAME = os.getenv("MODEL_PATH") + "/ernie-45-21b-a3b-bf16-paddle"
 
 
 class TestGeneration(unittest.TestCase):
@@ -33,14 +33,16 @@ class TestGeneration(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Set up test environment before any tests run"""
-        cls.llm = weakref.proxy(
-            LLM(
-                model=MODEL_NAME,
-                max_num_batched_tokens=4096,
-                tensor_parallel_size=1,
-                engine_worker_queue_port=get_random_port(),
-            )
-        )
+        try:
+            llm = LLM(
+                    model=MODEL_NAME,
+                    max_num_batched_tokens=4096,
+                    tensor_parallel_size=1,
+                    engine_worker_queue_port=os.getenv("FD_ENGINE_QUEUE_PORT"),
+                )
+            cls.llm = weakref.proxy(llm)
+        except Exception as e:
+            return
 
     @classmethod
     def tearDownClass(cls):

From 1100f4e250b904b6b2146fcf4d73416e8a0af8b6 Mon Sep 17 00:00:00 2001
From: ltd0924 <32387785+ltd0924@users.noreply.github.com>
Date: Tue, 12 Aug 2025 21:59:16 +0800
Subject: [PATCH 07/14] Update test_generation.py

---
 test/entrypoints/test_generation.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py
index 994d5d37eb..6baee5470e 100644
--- a/test/entrypoints/test_generation.py
+++ b/test/entrypoints/test_generation.py
@@ -35,11 +35,11 @@ def setUpClass(cls):
         """Set up test environment before any tests run"""
         try:
             llm = LLM(
-                    model=MODEL_NAME,
-                    max_num_batched_tokens=4096,
-                    tensor_parallel_size=1,
-                    engine_worker_queue_port=os.getenv("FD_ENGINE_QUEUE_PORT"),
-                )
+                model=MODEL_NAME,
+                max_num_batched_tokens=4096,
+                tensor_parallel_size=1,
+                engine_worker_queue_port=os.getenv("FD_ENGINE_QUEUE_PORT"),
+            )
             cls.llm = weakref.proxy(llm)
         except Exception as e:
             return

From c2ba8e4ca0bf8c434dcf118464bd34cfa5a6fc79 Mon Sep 17 00:00:00 2001
From: ltd0924 <32387785+ltd0924@users.noreply.github.com>
Date: Wed, 13 Aug 2025 00:17:20 +0800
Subject: [PATCH 08/14] Update test_generation.py

---
 test/entrypoints/test_generation.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py
index 6baee5470e..982063dc44 100644
--- a/test/entrypoints/test_generation.py
+++ b/test/entrypoints/test_generation.py
@@ -8,7 +8,6 @@
 from fastdeploy.engine.request import RequestOutput
 from fastdeploy.engine.sampling_params import SamplingParams
 from fastdeploy.entrypoints.llm import LLM
-from fastdeploy.utils import get_random_port
 
 MODEL_NAME = os.getenv("MODEL_PATH") + "/ernie-45-21b-a3b-bf16-paddle"
 
@@ -32,7 +31,6 @@ class TestGeneration(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
-        """Set up test environment before any tests run"""
         try:
             llm = LLM(
                 model=MODEL_NAME,
@@ -42,7 +40,8 @@ def setUpClass(cls):
             )
             cls.llm = weakref.proxy(llm)
         except Exception as e:
-            return
+            print(f"Setting up LLM failed: {e}")
+            raise unittest.SkipTest(f"LLM initialization failed: {e}")
 
     @classmethod
     def tearDownClass(cls):

From 0ab40964a8f3e07f3eb1ade710cb516f852bf1c5 Mon Sep 17 00:00:00 2001
From: ltd0924 <32387785+ltd0924@users.noreply.github.com>
Date: Wed, 13 Aug 2025 00:47:55 +0800
Subject: [PATCH 09/14] Update test_generation.py

---
 test/entrypoints/test_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py
index 982063dc44..4fce3066eb 100644
--- a/test/entrypoints/test_generation.py
+++ b/test/entrypoints/test_generation.py
@@ -9,7 +9,7 @@
 from fastdeploy.engine.sampling_params import SamplingParams
 from fastdeploy.entrypoints.llm import LLM
 
-MODEL_NAME = os.getenv("MODEL_PATH") + "/ernie-45-21b-a3b-bf16-paddle"
+MODEL_NAME = os.getenv("MODEL_PATH") + "/ernie-4_5-21b-a3b-bf16-paddle"
 
 
 class TestGeneration(unittest.TestCase):

From feb786d0563811ed15b6dbbf847eafabd84198bd Mon Sep 17 00:00:00 2001
From: ltd0924 <32387785+ltd0924@users.noreply.github.com>
Date: Wed, 13 Aug 2025 01:06:44 +0800
Subject: [PATCH 10/14] Update test_generation.py

---
 test/entrypoints/test_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py
index 4fce3066eb..95199cd5a3 100644
--- a/test/entrypoints/test_generation.py
+++ b/test/entrypoints/test_generation.py
@@ -36,7 +36,7 @@ def setUpClass(cls):
                 model=MODEL_NAME,
                 max_num_batched_tokens=4096,
                 tensor_parallel_size=1,
-                engine_worker_queue_port=os.getenv("FD_ENGINE_QUEUE_PORT"),
+                engine_worker_queue_port=int(os.getenv("FD_ENGINE_QUEUE_PORT")),
             )
             cls.llm = weakref.proxy(llm)
         except Exception as e:

From b57d01330f8f73ae6ec1223a9ad2b0df96200a01 Mon Sep 17 00:00:00 2001
From: ltd0924 <32387785+ltd0924@users.noreply.github.com>
Date: Thu, 14 Aug 2025 14:01:27 +0800
Subject: [PATCH 11/14] Update test_generation.py

---
 test/entrypoints/test_generation.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py
index 95199cd5a3..7480449204 100644
--- a/test/entrypoints/test_generation.py
+++ b/test/entrypoints/test_generation.py
@@ -1,5 +1,18 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
 
 import os
 import unittest

From 05fb6d936906bbeb697ac454949bfeb7d7058e51 Mon Sep 17 00:00:00 2001
From: ltd0924 <32387785+ltd0924@users.noreply.github.com>
Date: Tue, 19 Aug 2025 13:25:21 +0800
Subject: [PATCH 12/14] Update test_generation.py

---
 test/entrypoints/test_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py
index 7480449204..214f1017cd 100644
--- a/test/entrypoints/test_generation.py
+++ b/test/entrypoints/test_generation.py
@@ -22,7 +22,7 @@
 from fastdeploy.engine.sampling_params import SamplingParams
 from fastdeploy.entrypoints.llm import LLM
 
-MODEL_NAME = os.getenv("MODEL_PATH") + "/ernie-4_5-21b-a3b-bf16-paddle"
+MODEL_NAME = os.getenv("MODEL_PATH") + "/ERNIE-4.5-0.3B-Paddle"
 
 
 class TestGeneration(unittest.TestCase):

From 7d40020e953d8169a303827170d8d6ec9967be59 Mon Sep 17 00:00:00 2001
From: ltd0924 <32387785+ltd0924@users.noreply.github.com>
Date: Tue, 19 Aug 2025 15:18:11 +0800
Subject: [PATCH 13/14] Update setup.py

---
 test/plugins/setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/plugins/setup.py b/test/plugins/setup.py
index 92c953d61b..06038c15ea 100644
--- a/test/plugins/setup.py
+++ b/test/plugins/setup.py
@@ -22,6 +22,5 @@
         "fastdeploy.model_register_plugins": [
             "fd_add_dummy_model = fd_add_dummy_model:register",
         ],
-        "fastdeploy.model_runner_plugins": ["fd_add_dummy_model_runner = fd_add_dummy_model_runner:get_runner"],
     },
 )

From 6039385e210351cd523d8784a25745f3c05c358a Mon Sep 17 00:00:00 2001
From: ltd0924 <32387785+ltd0924@users.noreply.github.com>
Date: Tue, 19 Aug 2025 15:51:36 +0800
Subject: [PATCH 14/14] Delete test/plugins/test_model_runner_register.py

---
 test/plugins/test_model_runner_register.py | 35 ----------------------
 1 file changed, 35 deletions(-)
 delete mode 100644 test/plugins/test_model_runner_register.py

diff --git a/test/plugins/test_model_runner_register.py b/test/plugins/test_model_runner_register.py
deleted file mode 100644
index 85110ba626..0000000000
--- a/test/plugins/test_model_runner_register.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-from fastdeploy.plugins import load_model_runner_plugins
-
-
-class TestModelRunnerRegistryPlugins(unittest.TestCase):
-    def test_model_runner_callable(self):
-        runner_class = load_model_runner_plugins()
-        device_id = 1
-
-        # create runner
-        runner = runner_class(device_id)
-
-        # test func
-        res = runner.get_rank()
-
-        self.assertEqual(res, device_id)
-
-
-if __name__ == "__main__":
-    unittest.main()