From 160f5b259877c7bcf9be1cc0bd0749efc83035f7 Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Fri, 8 Aug 2025 13:18:14 +0800 Subject: [PATCH 01/14] Create test_generation.py --- test/entrypoints/test_generation.py | 100 ++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 test/entrypoints/test_generation.py diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py new file mode 100644 index 0000000000..0c5b5d0a87 --- /dev/null +++ b/test/entrypoints/test_generation.py @@ -0,0 +1,100 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import weakref + +import pytest + +from fastdeploy.engine.request import RequestOutput +from fastdeploy.engine.sampling_params import SamplingParams +from fastdeploy.entrypoints.llm import LLM +from fastdeploy.utils import get_random_port + +MODEL_NAME = "PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle" + +PROMPTS = [ + "Hello, my name is", + "The capital of China is", + "The future of AI is", + "人工智能是", +] + +TOKEN_IDS = [ + [0], + [0, 1], + [0, 1, 3], + [0, 2, 4, 6], +] + + +@pytest.fixture(scope="module") +def llm(): + # pytest caches the fixture so we use weakref.proxy to + # enable garbage collection + llm = LLM( + model=MODEL_NAME, + max_num_batched_tokens=4096, + tensor_parallel_size=1, + engine_worker_queue_port=get_random_port(), + ) + yield weakref.proxy(llm) + + +def assert_outputs_equal(o1: list[RequestOutput], o2: list[RequestOutput]): + assert [o.outputs for o in o1] == [o.outputs for o in o2] + + +@pytest.mark.parametrize("prompt_token_ids", TOKEN_IDS) +def test_consistency_single_prompt_tokens(llm: LLM, prompt_token_ids): + sampling_params = SamplingParams(temperature=1.0, top_p=0.0) + + output1 = llm.generate(prompts=prompt_token_ids, sampling_params=sampling_params) + + output2 = llm.generate({"prompt": "", "prompt_token_ids": prompt_token_ids}, sampling_params=sampling_params) + assert_outputs_equal(output1, output2) + + +def test_api_consistency_multi_prompt_tokens(llm: LLM): + sampling_params = SamplingParams( + temperature=1.0, + top_p=0.0, + ) + + output1 = llm.generate(prompts=TOKEN_IDS, sampling_params=sampling_params) + + output2 = llm.generate( + [{"prompt": "", "prompt_token_ids": p} for p in TOKEN_IDS], + sampling_params=sampling_params, + ) + + assert_outputs_equal(output1, output2) + + +def test_multiple_sampling_params(llm: LLM): + sampling_params = [ + SamplingParams(temperature=0.01, top_p=0.95), + SamplingParams(temperature=0.3, top_p=0.95), + SamplingParams(temperature=0.7, top_p=0.95), + SamplingParams(temperature=0.99, top_p=0.95), + ] + + # Multiple SamplingParams should be matched with each prompt + outputs = llm.generate(prompts=PROMPTS, sampling_params=sampling_params) + assert len(PROMPTS) == len(outputs) + + # Exception raised, if the size of params does not match the size of prompts + with pytest.raises(ValueError): + outputs = llm.generate(prompts=PROMPTS, sampling_params=sampling_params[:3]) + + # Single SamplingParams should be applied to every prompt + single_sampling_params = SamplingParams(temperature=0.3, top_p=0.95) + outputs = llm.generate(prompts=PROMPTS, sampling_params=single_sampling_params) + assert len(PROMPTS) == len(outputs) + + # sampling_params is None, default params should be applied + outputs = llm.generate(prompts=PROMPTS, sampling_params=None) + assert len(PROMPTS) == len(outputs) + + +if __name__ == "__main__": + pytest.main([__file__]) From 1c05330b5cd48a5ce63435992515e8e86f058b54 Mon Sep 17 00:00:00 2001 From: ltd0924 Date: Mon, 11 Aug 2025 19:12:25 +0800 Subject: [PATCH 02/14] update --- test/entrypoints/test_generation.py | 190 ++++++++++++++++------------ 1 file changed, 109 insertions(+), 81 deletions(-) diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py index 0c5b5d0a87..b12720ead7 100644 --- a/test/entrypoints/test_generation.py +++ b/test/entrypoints/test_generation.py @@ -2,99 +2,127 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import weakref +import unittest -import pytest from fastdeploy.engine.request import RequestOutput from fastdeploy.engine.sampling_params import SamplingParams from fastdeploy.entrypoints.llm import LLM from fastdeploy.utils import get_random_port -MODEL_NAME = "PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle" +MODEL_NAME = "/root/PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle" -PROMPTS = [ - "Hello, my name is", - "The capital of China is", - "The future of AI is", - "人工智能是", -] -TOKEN_IDS = [ - [0], - [0, 1], - [0, 1, 3], - [0, 2, 4, 6], -] +class TestGeneration(unittest.TestCase): + """Test case for generation functionality""" - -@pytest.fixture(scope="module") -def llm(): - # pytest caches the fixture so we use weakref.proxy to - # enable garbage collection - llm = LLM( - model=MODEL_NAME, - max_num_batched_tokens=4096, - tensor_parallel_size=1, - engine_worker_queue_port=get_random_port(), - ) - yield weakref.proxy(llm) - - -def assert_outputs_equal(o1: list[RequestOutput], o2: list[RequestOutput]): - assert [o.outputs for o in o1] == [o.outputs for o in o2] - - -@pytest.mark.parametrize("prompt_token_ids", TOKEN_IDS) -def test_consistency_single_prompt_tokens(llm: LLM, prompt_token_ids): - sampling_params = SamplingParams(temperature=1.0, top_p=0.0) - - output1 = llm.generate(prompts=prompt_token_ids, sampling_params=sampling_params) - - output2 = llm.generate({"prompt": "", "prompt_token_ids": prompt_token_ids}, sampling_params=sampling_params) - assert_outputs_equal(output1, output2) - - -def test_api_consistency_multi_prompt_tokens(llm: LLM): - sampling_params = SamplingParams( - temperature=1.0, - top_p=0.0, - ) - - output1 = llm.generate(prompts=TOKEN_IDS, sampling_params=sampling_params) - - output2 = llm.generate( - [{"prompt": "", "prompt_token_ids": p} for p in TOKEN_IDS], - sampling_params=sampling_params, - ) - - assert_outputs_equal(output1, output2) - - -def test_multiple_sampling_params(llm: LLM): - sampling_params = [ - SamplingParams(temperature=0.01, top_p=0.95), - SamplingParams(temperature=0.3, top_p=0.95), - SamplingParams(temperature=0.7, top_p=0.95), - SamplingParams(temperature=0.99, top_p=0.95), + TOKEN_IDS = [ + [0], + [0, 1], + [0, 1, 3], + [0, 2, 4, 6], ] - # Multiple SamplingParams should be matched with each prompt - outputs = llm.generate(prompts=PROMPTS, sampling_params=sampling_params) - assert len(PROMPTS) == len(outputs) - - # Exception raised, if the size of params does not match the size of prompts - with pytest.raises(ValueError): - outputs = llm.generate(prompts=PROMPTS, sampling_params=sampling_params[:3]) - - # Single SamplingParams should be applied to every prompt - single_sampling_params = SamplingParams(temperature=0.3, top_p=0.95) - outputs = llm.generate(prompts=PROMPTS, sampling_params=single_sampling_params) - assert len(PROMPTS) == len(outputs) - - # sampling_params is None, default params should be applied - outputs = llm.generate(prompts=PROMPTS, sampling_params=None) - assert len(PROMPTS) == len(outputs) + PROMPTS = [ + "Hello, my name is", + "The capital of China is", + "The future of AI is", + "人工智能是", + ] + @classmethod + def setUpClass(cls): + """Set up test environment before any tests run""" + cls.llm = weakref.proxy(LLM( + model=MODEL_NAME, + max_num_batched_tokens=4096, + tensor_parallel_size=1, + engine_worker_queue_port=get_random_port(), + )) + + @classmethod + def tearDownClass(cls): + """Clean up after all tests have run""" + if hasattr(cls, 'llm'): + del cls.llm + + + def assert_outputs_equal(self, o1: list[RequestOutput], o2: list[RequestOutput]): + self.assertEqual([o.outputs for o in o1], [o.outputs for o in o2]) + + def test_consistency_single_prompt_tokens(self): + """Test consistency between different prompt input formats""" + sampling_params = SamplingParams(temperature=1.0, top_p=0.0) + + for prompt_token_ids in self.TOKEN_IDS: + with self.subTest(prompt_token_ids=prompt_token_ids): + output1 = self.llm.generate( + prompts=prompt_token_ids, + sampling_params=sampling_params + ) + output2 = self.llm.generate( + {"prompt": "", "prompt_token_ids": prompt_token_ids}, + sampling_params=sampling_params + ) + self.assert_outputs_equal(output1, output2) + + + def test_api_consistency_multi_prompt_tokens(self): + """Test consistency with multiple prompt tokens""" + sampling_params = SamplingParams( + temperature=1.0, + top_p=0.0, + ) + + output1 = self.llm.generate( + prompts=self.TOKEN_IDS, + sampling_params=sampling_params + ) + + output2 = self.llm.generate( + [{"prompt": "", "prompt_token_ids": p} for p in self.TOKEN_IDS], + sampling_params=sampling_params, + ) + + self.assert_outputs_equal(output1, output2) + + def test_multiple_sampling_params(self): + """Test multiple sampling parameters combinations""" + sampling_params = [ + SamplingParams(temperature=0.01, top_p=0.95), + SamplingParams(temperature=0.3, top_p=0.95), + SamplingParams(temperature=0.7, top_p=0.95), + SamplingParams(temperature=0.99, top_p=0.95), + ] + + # Multiple SamplingParams should be matched with each prompt + outputs = self.llm.generate( + prompts=self.PROMPTS, + sampling_params=sampling_params + ) + self.assertEqual(len(self.PROMPTS), len(outputs)) + + # Exception raised if size mismatch + with self.assertRaises(ValueError): + self.llm.generate( + prompts=self.PROMPTS, + sampling_params=sampling_params[:3] + ) + + # Single SamplingParams should be applied to every prompt + single_sampling_params = SamplingParams(temperature=0.3, top_p=0.95) + outputs = self.llm.generate( + prompts=self.PROMPTS, + sampling_params=single_sampling_params + ) + self.assertEqual(len(self.PROMPTS), len(outputs)) + + # sampling_params is None, default params should be applied + outputs = self.llm.generate( + prompts=self.PROMPTS, + sampling_params=None + ) + self.assertEqual(len(self.PROMPTS), len(outputs)) if __name__ == "__main__": - pytest.main([__file__]) + unittest.main() From ec29a7392add297b78fb0287a20a817c589c651e Mon Sep 17 00:00:00 2001 From: ltd0924 Date: Mon, 11 Aug 2025 19:17:47 +0800 Subject: [PATCH 03/14] update --- test/entrypoints/test_generation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py index b12720ead7..61ec53882d 100644 --- a/test/entrypoints/test_generation.py +++ b/test/entrypoints/test_generation.py @@ -3,14 +3,14 @@ import weakref import unittest - +import os from fastdeploy.engine.request import RequestOutput from fastdeploy.engine.sampling_params import SamplingParams from fastdeploy.entrypoints.llm import LLM from fastdeploy.utils import get_random_port -MODEL_NAME = "/root/PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle" +MODEL_NAME = os.get_env("MODEL_PATH") + "/ERNIE-4.5-0.3B-Paddle" class TestGeneration(unittest.TestCase): From 4ca5cd99d48df19c597b18ce484a253b8aa09f3b Mon Sep 17 00:00:00 2001 From: ltd0924 Date: Mon, 11 Aug 2025 19:21:34 +0800 Subject: [PATCH 04/14] format --- test/entrypoints/test_generation.py | 58 ++++++++++------------------- 1 file changed, 20 insertions(+), 38 deletions(-) diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py index 61ec53882d..0e8b33ce6d 100644 --- a/test/entrypoints/test_generation.py +++ b/test/entrypoints/test_generation.py @@ -1,9 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import weakref -import unittest import os +import unittest +import weakref from fastdeploy.engine.request import RequestOutput from fastdeploy.engine.sampling_params import SamplingParams @@ -33,40 +33,36 @@ class TestGeneration(unittest.TestCase): @classmethod def setUpClass(cls): """Set up test environment before any tests run""" - cls.llm = weakref.proxy(LLM( - model=MODEL_NAME, - max_num_batched_tokens=4096, - tensor_parallel_size=1, - engine_worker_queue_port=get_random_port(), - )) + cls.llm = weakref.proxy( + LLM( + model=MODEL_NAME, + max_num_batched_tokens=4096, + tensor_parallel_size=1, + engine_worker_queue_port=get_random_port(), + ) + ) @classmethod def tearDownClass(cls): """Clean up after all tests have run""" - if hasattr(cls, 'llm'): + if hasattr(cls, "llm"): del cls.llm - def assert_outputs_equal(self, o1: list[RequestOutput], o2: list[RequestOutput]): self.assertEqual([o.outputs for o in o1], [o.outputs for o in o2]) def test_consistency_single_prompt_tokens(self): """Test consistency between different prompt input formats""" sampling_params = SamplingParams(temperature=1.0, top_p=0.0) - + for prompt_token_ids in self.TOKEN_IDS: with self.subTest(prompt_token_ids=prompt_token_ids): - output1 = self.llm.generate( - prompts=prompt_token_ids, - sampling_params=sampling_params - ) + output1 = self.llm.generate(prompts=prompt_token_ids, sampling_params=sampling_params) output2 = self.llm.generate( - {"prompt": "", "prompt_token_ids": prompt_token_ids}, - sampling_params=sampling_params + {"prompt": "", "prompt_token_ids": prompt_token_ids}, sampling_params=sampling_params ) self.assert_outputs_equal(output1, output2) - def test_api_consistency_multi_prompt_tokens(self): """Test consistency with multiple prompt tokens""" sampling_params = SamplingParams( @@ -74,10 +70,7 @@ def test_api_consistency_multi_prompt_tokens(self): top_p=0.0, ) - output1 = self.llm.generate( - prompts=self.TOKEN_IDS, - sampling_params=sampling_params - ) + output1 = self.llm.generate(prompts=self.TOKEN_IDS, sampling_params=sampling_params) output2 = self.llm.generate( [{"prompt": "", "prompt_token_ids": p} for p in self.TOKEN_IDS], @@ -96,33 +89,22 @@ def test_multiple_sampling_params(self): ] # Multiple SamplingParams should be matched with each prompt - outputs = self.llm.generate( - prompts=self.PROMPTS, - sampling_params=sampling_params - ) + outputs = self.llm.generate(prompts=self.PROMPTS, sampling_params=sampling_params) self.assertEqual(len(self.PROMPTS), len(outputs)) # Exception raised if size mismatch with self.assertRaises(ValueError): - self.llm.generate( - prompts=self.PROMPTS, - sampling_params=sampling_params[:3] - ) + self.llm.generate(prompts=self.PROMPTS, sampling_params=sampling_params[:3]) # Single SamplingParams should be applied to every prompt single_sampling_params = SamplingParams(temperature=0.3, top_p=0.95) - outputs = self.llm.generate( - prompts=self.PROMPTS, - sampling_params=single_sampling_params - ) + outputs = self.llm.generate(prompts=self.PROMPTS, sampling_params=single_sampling_params) self.assertEqual(len(self.PROMPTS), len(outputs)) # sampling_params is None, default params should be applied - outputs = self.llm.generate( - prompts=self.PROMPTS, - sampling_params=None - ) + outputs = self.llm.generate(prompts=self.PROMPTS, sampling_params=None) self.assertEqual(len(self.PROMPTS), len(outputs)) + if __name__ == "__main__": unittest.main() From 739bbb9f753cee40e172eddd727dba19ee8a55c4 Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Tue, 12 Aug 2025 10:41:56 +0800 Subject: [PATCH 05/14] Update test_generation.py --- test/entrypoints/test_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py index 0e8b33ce6d..ad17fb07f7 100644 --- a/test/entrypoints/test_generation.py +++ b/test/entrypoints/test_generation.py @@ -10,7 +10,7 @@ from fastdeploy.entrypoints.llm import LLM from fastdeploy.utils import get_random_port -MODEL_NAME = os.get_env("MODEL_PATH") + "/ERNIE-4.5-0.3B-Paddle" +MODEL_NAME = os.getenv("MODEL_PATH") + "/ERNIE-4.5-0.3B-Paddle" class TestGeneration(unittest.TestCase): From 880c4cc1bead836dfed2ed09f5734da1c0dea2fd Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Tue, 12 Aug 2025 21:53:18 +0800 Subject: [PATCH 06/14] Update test_generation.py --- test/entrypoints/test_generation.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py index ad17fb07f7..994d5d37eb 100644 --- a/test/entrypoints/test_generation.py +++ b/test/entrypoints/test_generation.py @@ -10,7 +10,7 @@ from fastdeploy.entrypoints.llm import LLM from fastdeploy.utils import get_random_port -MODEL_NAME = os.getenv("MODEL_PATH") + "/ERNIE-4.5-0.3B-Paddle" +MODEL_NAME = os.getenv("MODEL_PATH") + "/ernie-45-21b-a3b-bf16-paddle" class TestGeneration(unittest.TestCase): @@ -33,14 +33,16 @@ class TestGeneration(unittest.TestCase): @classmethod def setUpClass(cls): """Set up test environment before any tests run""" - cls.llm = weakref.proxy( - LLM( - model=MODEL_NAME, - max_num_batched_tokens=4096, - tensor_parallel_size=1, - engine_worker_queue_port=get_random_port(), - ) - ) + try: + llm = LLM( + model=MODEL_NAME, + max_num_batched_tokens=4096, + tensor_parallel_size=1, + engine_worker_queue_port=os.getenv("FD_ENGINE_QUEUE_PORT"), + ) + cls.llm = weakref.proxy(llm) + except Exception as e: + return @classmethod def tearDownClass(cls): From 1100f4e250b904b6b2146fcf4d73416e8a0af8b6 Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Tue, 12 Aug 2025 21:59:16 +0800 Subject: [PATCH 07/14] Update test_generation.py --- test/entrypoints/test_generation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py index 994d5d37eb..6baee5470e 100644 --- a/test/entrypoints/test_generation.py +++ b/test/entrypoints/test_generation.py @@ -35,11 +35,11 @@ def setUpClass(cls): """Set up test environment before any tests run""" try: llm = LLM( - model=MODEL_NAME, - max_num_batched_tokens=4096, - tensor_parallel_size=1, - engine_worker_queue_port=os.getenv("FD_ENGINE_QUEUE_PORT"), - ) + model=MODEL_NAME, + max_num_batched_tokens=4096, + tensor_parallel_size=1, + engine_worker_queue_port=os.getenv("FD_ENGINE_QUEUE_PORT"), + ) cls.llm = weakref.proxy(llm) except Exception as e: return From c2ba8e4ca0bf8c434dcf118464bd34cfa5a6fc79 Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Wed, 13 Aug 2025 00:17:20 +0800 Subject: [PATCH 08/14] Update test_generation.py --- test/entrypoints/test_generation.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py index 6baee5470e..982063dc44 100644 --- a/test/entrypoints/test_generation.py +++ b/test/entrypoints/test_generation.py @@ -8,7 +8,6 @@ from fastdeploy.engine.request import RequestOutput from fastdeploy.engine.sampling_params import SamplingParams from fastdeploy.entrypoints.llm import LLM -from fastdeploy.utils import get_random_port MODEL_NAME = os.getenv("MODEL_PATH") + "/ernie-45-21b-a3b-bf16-paddle" @@ -32,7 +31,6 @@ class TestGeneration(unittest.TestCase): @classmethod def setUpClass(cls): - """Set up test environment before any tests run""" try: llm = LLM( model=MODEL_NAME, @@ -42,7 +40,8 @@ def setUpClass(cls): ) cls.llm = weakref.proxy(llm) except Exception as e: - return + print(f"Setting up LLM failed: {e}") + raise unittest.SkipTest(f"LLM initialization failed: {e}") @classmethod def tearDownClass(cls): From 0ab40964a8f3e07f3eb1ade710cb516f852bf1c5 Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Wed, 13 Aug 2025 00:47:55 +0800 Subject: [PATCH 09/14] Update test_generation.py --- test/entrypoints/test_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py index 982063dc44..4fce3066eb 100644 --- a/test/entrypoints/test_generation.py +++ b/test/entrypoints/test_generation.py @@ -9,7 +9,7 @@ from fastdeploy.engine.sampling_params import SamplingParams from fastdeploy.entrypoints.llm import LLM -MODEL_NAME = os.getenv("MODEL_PATH") + "/ernie-45-21b-a3b-bf16-paddle" +MODEL_NAME = os.getenv("MODEL_PATH") + "/ernie-4_5-21b-a3b-bf16-paddle" class TestGeneration(unittest.TestCase): From feb786d0563811ed15b6dbbf847eafabd84198bd Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Wed, 13 Aug 2025 01:06:44 +0800 Subject: [PATCH 10/14] Update test_generation.py --- test/entrypoints/test_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py index 4fce3066eb..95199cd5a3 100644 --- a/test/entrypoints/test_generation.py +++ b/test/entrypoints/test_generation.py @@ -36,7 +36,7 @@ def setUpClass(cls): model=MODEL_NAME, max_num_batched_tokens=4096, tensor_parallel_size=1, - engine_worker_queue_port=os.getenv("FD_ENGINE_QUEUE_PORT"), + engine_worker_queue_port=int(os.getenv("FD_ENGINE_QUEUE_PORT")), ) cls.llm = weakref.proxy(llm) except Exception as e: From b57d01330f8f73ae6ec1223a9ad2b0df96200a01 Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Thu, 14 Aug 2025 14:01:27 +0800 Subject: [PATCH 11/14] Update test_generation.py --- test/entrypoints/test_generation.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py index 95199cd5a3..7480449204 100644 --- a/test/entrypoints/test_generation.py +++ b/test/entrypoints/test_generation.py @@ -1,5 +1,18 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" import os import unittest From 05fb6d936906bbeb697ac454949bfeb7d7058e51 Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Tue, 19 Aug 2025 13:25:21 +0800 Subject: [PATCH 12/14] Update test_generation.py --- test/entrypoints/test_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/entrypoints/test_generation.py b/test/entrypoints/test_generation.py index 7480449204..214f1017cd 100644 --- a/test/entrypoints/test_generation.py +++ b/test/entrypoints/test_generation.py @@ -22,7 +22,7 @@ from fastdeploy.engine.sampling_params import SamplingParams from fastdeploy.entrypoints.llm import LLM -MODEL_NAME = os.getenv("MODEL_PATH") + "/ernie-4_5-21b-a3b-bf16-paddle" +MODEL_NAME = os.getenv("MODEL_PATH") + "/ERNIE-4.5-0.3B-Paddle" class TestGeneration(unittest.TestCase): From 7d40020e953d8169a303827170d8d6ec9967be59 Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Tue, 19 Aug 2025 15:18:11 +0800 Subject: [PATCH 13/14] Update setup.py --- test/plugins/setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/plugins/setup.py b/test/plugins/setup.py index 92c953d61b..06038c15ea 100644 --- a/test/plugins/setup.py +++ b/test/plugins/setup.py @@ -22,6 +22,5 @@ "fastdeploy.model_register_plugins": [ "fd_add_dummy_model = fd_add_dummy_model:register", ], - "fastdeploy.model_runner_plugins": ["fd_add_dummy_model_runner = fd_add_dummy_model_runner:get_runner"], }, ) From 6039385e210351cd523d8784a25745f3c05c358a Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Tue, 19 Aug 2025 15:51:36 +0800 Subject: [PATCH 14/14] Delete test/plugins/test_model_runner_register.py --- test/plugins/test_model_runner_register.py | 35 ---------------------- 1 file changed, 35 deletions(-) delete mode 100644 test/plugins/test_model_runner_register.py diff --git a/test/plugins/test_model_runner_register.py b/test/plugins/test_model_runner_register.py deleted file mode 100644 index 85110ba626..0000000000 --- a/test/plugins/test_model_runner_register.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -from fastdeploy.plugins import load_model_runner_plugins - - -class TestModelRunnerRegistryPlugins(unittest.TestCase): - def test_model_runner_callable(self): - runner_class = load_model_runner_plugins() - device_id = 1 - - # create runner - runner = runner_class(device_id) - - # test func - res = runner.get_rank() - - self.assertEqual(res, device_id) - - -if __name__ == "__main__": - unittest.main()