Skip to content

Commit 78d7610

Browse files
committed
fix: revert vllm engine to 084v for gme qwen2vl
1 parent c4ec096 commit 78d7610

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

src/emd/models/embeddings/qwen.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .. import Model
2-
from ..engines import vllm_qwen3_engin091, vllm_gme_qwen2vl_engine091
2+
from ..engines import vllm_qwen3_engin091, vllm_gme_qwen2vl_engine091, vllm_gme_qwen2vl_engine084_compat
33
from ..services import sagemaker_service,local_service,ecs_service
44
from ..frameworks import fastapi_framework
55
from ..instances import (
@@ -109,7 +109,7 @@
109109
Model.register(
110110
dict(
111111
model_id = "gme-Qwen2-VL-7B-Instruct",
112-
supported_engines=[vllm_gme_qwen2vl_engine091],
112+
supported_engines=[vllm_gme_qwen2vl_engine084_compat],
113113
supported_instances=[
114114
g5d4xlarge_instance,
115115
g5d8xlarge_instance,
@@ -130,7 +130,7 @@
130130
modelscope_model_id="Alibaba-NLP/gme-Qwen2-VL-7B-Instruct",
131131
require_huggingface_token=False,
132132
application_scenario="Multimodal RAG, image-text retrieval, visual search",
133-
description="General Multimodal Embedding model based on Qwen2-VL architecture, supporting text, image, and image-text pair inputs for unified multimodal representation learning and retrieval tasks.",
133+
description="General Multimodal Embedding model based on Qwen2-VL architecture, supporting text, image, and image-text pair inputs for unified multimodal representation learning and retrieval tasks. Uses vLLM v0.8.4 for transformers compatibility.",
134134
model_type=ModelType.EMBEDDING,
135135
model_series=GME_SERIES
136136
)

src/emd/models/engines.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,15 @@ class KtransformersEngine(OpenAICompitableEngine):
190190
"description": "VLLM engine for GME multimodal embedding models based on Qwen2-VL"
191191
})
192192

193+
# GME-compatible engine with transformers 4.51.3
194+
vllm_gme_qwen2vl_engine084_compat = VllmEngine(**{
195+
**vllm_embedding_engine091.model_dump(),
196+
"engine_dockerfile_config": {"VERSION":"v0.8.4"},
197+
"environment_variables": "export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True",
198+
"default_cli_args": " --max_num_seq 20 --disable-log-stats --trust-remote-code --task embed --limit-mm-per-prompt image=10 --gpu_memory_utilization 0.8",
199+
"description": "VLLM engine v0.8.4 for GME multimodal embedding models with compatible transformers version"
200+
})
201+
193202

194203
vllm_qwen2vl72b_engine064 = VllmEngine(**{
195204
**vllm_engine064.model_dump(),

0 commit comments

Comments
 (0)