8
8
9
9
from test_vision_openai_server_common import *
10
10
11
- from sglang .srt .utils import kill_process_tree
12
11
from sglang .test .test_utils import (
13
12
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH ,
14
- DEFAULT_URL_FOR_TEST ,
15
- CustomTestCase ,
16
13
popen_launch_server ,
17
14
)
18
15
19
16
20
- class TestQwen2VLServer (TestOpenAIVisionServer ):
17
+ class TestLlava (ImageOpenAITestMixin ):
18
+ @classmethod
19
+ def setUpClass (cls ):
20
+ cls .model = "lmms-lab/llava-onevision-qwen2-0.5b-ov"
21
+ cls .base_url = DEFAULT_URL_FOR_TEST
22
+ cls .api_key = "sk-123456"
23
+ cls .process = popen_launch_server (
24
+ cls .model ,
25
+ cls .base_url ,
26
+ timeout = DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH ,
27
+ api_key = cls .api_key ,
28
+ )
29
+ cls .base_url += "/v1"
30
+
31
+
32
+ class TestQwen2VLServer (ImageOpenAITestMixin , VideoOpenAITestMixin ):
21
33
@classmethod
22
34
def setUpClass (cls ):
23
35
cls .model = "Qwen/Qwen2-VL-7B-Instruct"
@@ -37,11 +49,8 @@ def setUpClass(cls):
37
49
)
38
50
cls .base_url += "/v1"
39
51
40
- def test_video_chat_completion (self ):
41
- self ._test_video_chat_completion ()
42
-
43
52
44
- class TestQwen2_5_VLServer (TestOpenAIVisionServer ):
53
+ class TestQwen2_5_VLServer (ImageOpenAITestMixin , VideoOpenAITestMixin ):
45
54
@classmethod
46
55
def setUpClass (cls ):
47
56
cls .model = "Qwen/Qwen2.5-VL-7B-Instruct"
@@ -61,9 +70,6 @@ def setUpClass(cls):
61
70
)
62
71
cls .base_url += "/v1"
63
72
64
- def test_video_chat_completion (self ):
65
- self ._test_video_chat_completion ()
66
-
67
73
68
74
class TestVLMContextLengthIssue (CustomTestCase ):
69
75
@classmethod
@@ -137,11 +143,8 @@ def test_single_image_chat_completion(self):
137
143
# )
138
144
# cls.base_url += "/v1"
139
145
140
- # def test_video_chat_completion(self):
141
- # pass
142
146
143
-
144
- class TestMinicpmvServer (TestOpenAIVisionServer ):
147
+ class TestMinicpmvServer (ImageOpenAITestMixin ):
145
148
@classmethod
146
149
def setUpClass (cls ):
147
150
cls .model = "openbmb/MiniCPM-V-2_6"
@@ -162,7 +165,7 @@ def setUpClass(cls):
162
165
cls .base_url += "/v1"
163
166
164
167
165
- class TestInternVL2_5Server (TestOpenAIVisionServer ):
168
+ class TestInternVL2_5Server (ImageOpenAITestMixin ):
166
169
@classmethod
167
170
def setUpClass (cls ):
168
171
cls .model = "OpenGVLab/InternVL2_5-2B"
@@ -181,7 +184,7 @@ def setUpClass(cls):
181
184
cls .base_url += "/v1"
182
185
183
186
184
- class TestMinicpmoServer (TestOpenAIVisionServer ):
187
+ class TestMinicpmoServer (ImageOpenAITestMixin , AudioOpenAITestMixin ):
185
188
@classmethod
186
189
def setUpClass (cls ):
187
190
cls .model = "openbmb/MiniCPM-o-2_6"
@@ -201,12 +204,8 @@ def setUpClass(cls):
201
204
)
202
205
cls .base_url += "/v1"
203
206
204
- def test_audio_chat_completion (self ):
205
- self ._test_audio_speech_completion ()
206
- self ._test_audio_ambient_completion ()
207
-
208
207
209
- class TestMimoVLServer (TestOpenAIVisionServer ):
208
+ class TestMimoVLServer (ImageOpenAITestMixin ):
210
209
@classmethod
211
210
def setUpClass (cls ):
212
211
cls .model = "XiaomiMiMo/MiMo-VL-7B-RL"
@@ -228,6 +227,95 @@ def setUpClass(cls):
228
227
cls .base_url += "/v1"
229
228
230
229
230
+ class TestVILAServer (ImageOpenAITestMixin ):
231
+ @classmethod
232
+ def setUpClass (cls ):
233
+ cls .model = "Efficient-Large-Model/NVILA-Lite-2B-hf-0626"
234
+ cls .base_url = DEFAULT_URL_FOR_TEST
235
+ cls .api_key = "sk-123456"
236
+ cls .revision = "6bde1de5964b40e61c802b375fff419edc867506"
237
+ cls .process = popen_launch_server (
238
+ cls .model ,
239
+ cls .base_url ,
240
+ timeout = DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH ,
241
+ api_key = cls .api_key ,
242
+ other_args = [
243
+ "--trust-remote-code" ,
244
+ "--context-length=65536" ,
245
+ f"--revision={ cls .revision } " ,
246
+ "--cuda-graph-max-bs" ,
247
+ "4" ,
248
+ ],
249
+ )
250
+ cls .base_url += "/v1"
251
+
252
+
253
+ class TestPhi4MMServer (ImageOpenAITestMixin , AudioOpenAITestMixin ):
254
+ @classmethod
255
+ def setUpClass (cls ):
256
+ # Manually download LoRA adapter_config.json as it's not downloaded by the model loader by default.
257
+ from huggingface_hub import constants , snapshot_download
258
+
259
+ snapshot_download (
260
+ "microsoft/Phi-4-multimodal-instruct" ,
261
+ allow_patterns = ["**/adapter_config.json" ],
262
+ )
263
+
264
+ cls .model = "microsoft/Phi-4-multimodal-instruct"
265
+ cls .base_url = DEFAULT_URL_FOR_TEST
266
+ cls .api_key = "sk-123456"
267
+
268
+ revision = "33e62acdd07cd7d6635badd529aa0a3467bb9c6a"
269
+ cls .process = popen_launch_server (
270
+ cls .model ,
271
+ cls .base_url ,
272
+ timeout = DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH ,
273
+ other_args = [
274
+ "--trust-remote-code" ,
275
+ "--mem-fraction-static" ,
276
+ "0.70" ,
277
+ "--disable-radix-cache" ,
278
+ "--max-loras-per-batch" ,
279
+ "2" ,
280
+ "--revision" ,
281
+ revision ,
282
+ "--lora-paths" ,
283
+ f"vision={ constants .HF_HUB_CACHE } /models--microsoft--Phi-4-multimodal-instruct/snapshots/{ revision } /vision-lora" ,
284
+ f"speech={ constants .HF_HUB_CACHE } /models--microsoft--Phi-4-multimodal-instruct/snapshots/{ revision } /speech-lora" ,
285
+ "--cuda-graph-max-bs" ,
286
+ "4" ,
287
+ ],
288
+ )
289
+ cls .base_url += "/v1"
290
+
291
+ def get_vision_request_kwargs (self ):
292
+ return {
293
+ "extra_body" : {
294
+ "lora_path" : "vision" ,
295
+ "top_k" : 1 ,
296
+ "top_p" : 1.0 ,
297
+ }
298
+ }
299
+
300
+ def get_audio_request_kwargs (self ):
301
+ return {
302
+ "extra_body" : {
303
+ "lora_path" : "speech" ,
304
+ "top_k" : 1 ,
305
+ "top_p" : 1.0 ,
306
+ }
307
+ }
308
+
309
+ # This _test_audio_ambient_completion test is way too complicated to pass for a small LLM
310
+ def test_audio_ambient_completion (self ):
311
+ pass
312
+
313
+
231
314
if __name__ == "__main__" :
232
- del TestOpenAIVisionServer
315
+ del (
316
+ TestOpenAIOmniServerBase ,
317
+ ImageOpenAITestMixin ,
318
+ VideoOpenAITestMixin ,
319
+ AudioOpenAITestMixin ,
320
+ )
233
321
unittest .main ()
0 commit comments