Skip to content

Commit 0a5e2f3

Browse files
committed
merge
2 parents 72930b3 + 92e8fef commit 0a5e2f3

File tree

13 files changed

+216
-58
lines changed

13 files changed

+216
-58
lines changed

src/emd/commands/deploy.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,9 @@ def deploy(
253253
else:
254254
region = get_current_region()
255255

256-
if region != LOCAL_REGION:
257-
smart_bootstrap_manager.auto_bootstrap_if_needed(region)
256+
# Only bootstrap for non-local deployments
257+
if region != LOCAL_REGION and not only_allow_local_deploy:
258+
smart_bootstrap_manager.auto_bootstrap_if_needed(region, skip_confirm)
258259

259260
if dockerfile_local_path:
260261
response = sdk_deploy(
@@ -417,7 +418,7 @@ def deploy(
417418
support_gpu_num = support_gpu_num or gpu_num
418419
default_gpus_str = ",".join([str(i) for i in range(min(gpu_num,support_gpu_num))])
419420
gpus_to_deploy = questionary.text(
420-
"input the local gpu ids to deploy the model (e.g. 0,1,2):",
421+
"Please specify the local GPU IDs for model deployment (e.g., 0,1,2):",
421422
default=f"{default_gpus_str}"
422423
).ask()
423424
os.environ['CUDA_VISIBLE_DEVICES']=gpus_to_deploy

src/emd/models/engines.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,16 @@ class KtransformersEngine(OpenAICompitableEngine):
558558
"default_cli_args": " --max_new_tokens 2048",
559559
})
560560

561+
# VLLM Engine v0.9.1 for dots.ocr
562+
vllm_dots_ocr_engine091 = VllmEngine(**{
563+
**vllm_engine064.model_dump(),
564+
"engine_dockerfile_config": {"VERSION":"v0.9.1"},
565+
"dockerfile_name": "Dockerfile_dots_ocr",
566+
"environment_variables": "export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True",
567+
"default_cli_args": " --trust-remote-code --chat-template-content-format string --gpu-memory-utilization 0.95 --max_model_len 8192 --disable-log-stats --max_num_seq 5 --enforce-eager",
568+
"description": "VLLM v0.9.1 engine for dots.ocr multilingual document parsing model with flash-attn support and eager execution for custom models"
569+
})
570+
561571
custom_engine = Engine(**{
562572
"engine_type":EngineType.CUSTOM,
563573
})

src/emd/models/model_series.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,3 +163,9 @@
163163
description="GPT-OSS (GPT Open Source Software) is OpenAI's initiative to provide open-source AI models, making advanced language models accessible to developers, researchers, and organizations for building, experimenting, and scaling generative AI applications. These models are designed to foster innovation and collaboration in the open-source AI community.",
164164
reference_link="https://openai.com/index/introducing-gpt-oss/"
165165
)
166+
167+
DOTS_OCR_SERIES = ModelSeries(
168+
model_series_name=ModelSeriesType.DOTS_OCR,
169+
description="dots.ocr is a powerful, multilingual document parser that unifies layout detection and content recognition within a single vision-language model while maintaining good reading order. Despite its compact 1.7B-parameter LLM foundation, it achieves state-of-the-art(SOTA) performance on text, tables, and reading order tasks with multilingual support for over 100 languages.",
170+
reference_link="https://github.com/rednote-hilab/dots.ocr"
171+
)

src/emd/models/utils/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,3 +236,4 @@ class ModelSeriesType(ConstantBase):
236236
DEEPSEEK_v3 = "deepseek v3"
237237
BAICHUAN = "baichuan"
238238
GPTOSS = "gptoss"
239+
DOTS_OCR = "dots_ocr"

src/emd/models/vlms/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
from . import internvl
33
from . import gemma3
44
from . import mistral
5+
from . import dots_ocr

src/emd/models/vlms/dots_ocr.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from .. import Model
2+
from ..model_series import DOTS_OCR_SERIES
3+
from ..engines import vllm_dots_ocr_engine091, huggingface_llm_engine_4d41d2
4+
from ..instances import (
5+
g5dxlarge_instance,
6+
g5d2xlarge_instance,
7+
g5d4xlarge_instance,
8+
g5d8xlarge_instance,
9+
local_instance
10+
)
11+
from ..services import (
12+
sagemaker_service,
13+
sagemaker_async_service,
14+
ecs_service,
15+
local_service
16+
)
17+
from ..frameworks import fastapi_framework
18+
from emd.models.utils.constants import ModelType
19+
20+
Model.register(
21+
dict(
22+
model_id="dotsocr",
23+
model_type=ModelType.VLM,
24+
description="dots.ocr is a powerful, multilingual document parser that unifies layout detection and content recognition within a single vision-language model. Built on a compact 1.7B-parameter LLM foundation, it achieves state-of-the-art performance on text, tables, and reading order tasks with support for over 100 languages including English, Chinese, and many others.",
25+
application_scenario="multilingual document layout parsing, OCR, document understanding, table extraction, formula recognition, reading order detection",
26+
supported_engines=[vllm_dots_ocr_engine091],
27+
supported_instances=[
28+
g5dxlarge_instance, g5d2xlarge_instance, g5d4xlarge_instance, g5d8xlarge_instance, local_instance
29+
],
30+
supported_services=[
31+
sagemaker_service, sagemaker_async_service, ecs_service, local_service
32+
],
33+
supported_frameworks=[
34+
fastapi_framework
35+
],
36+
allow_china_region=True,
37+
huggingface_model_id="rednote-hilab/dots.ocr",
38+
modelscope_model_id="rednote-hilab/dots.ocr",
39+
require_huggingface_token=False,
40+
model_series=DOTS_OCR_SERIES,
41+
)
42+
)

src/emd/utils/smart_bootstrap.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def show_version_mismatch_warning(self, current_version: str, deployed_version:
121121
self.console.print() # Empty line for spacing
122122

123123

124-
def auto_bootstrap_if_needed(self, region: str) -> bool:
124+
def auto_bootstrap_if_needed(self, region: str, skip_confirm: bool = False) -> bool:
125125
"""
126126
Automatically run bootstrap if needed based on comprehensive infrastructure check
127127
Returns: True if bootstrap was run, False otherwise
@@ -145,18 +145,19 @@ def auto_bootstrap_if_needed(self, region: str) -> bool:
145145
# Infrastructure missing/incomplete OR version mismatch - ask for confirmation
146146
self.show_bootstrap_notification(current_version, deployed_version)
147147

148-
# Ask for user confirmation
149-
if deployed_version:
150-
# Update scenario
151-
confirm_msg = f"Update infrastructure from {deployed_version} to {current_version}?"
152-
else:
153-
# Initialize scenario
154-
confirm_msg = f"Initialize EMD infrastructure for version {current_version}?"
155-
156-
if not typer.confirm(confirm_msg, default=False):
157-
self.console.print("[yellow]Bootstrap cancelled. Infrastructure will not be updated.[/yellow]")
158-
self.console.print("[red]Deployment cannot proceed without compatible infrastructure.[/red]")
159-
raise typer.Exit(1)
148+
# Ask for user confirmation unless skip_confirm is True
149+
if not skip_confirm:
150+
if deployed_version:
151+
# Update scenario
152+
confirm_msg = f"Update infrastructure from {deployed_version} to {current_version}?"
153+
else:
154+
# Initialize scenario
155+
confirm_msg = f"Initialize EMD infrastructure for version {current_version}?"
156+
157+
if not typer.confirm(confirm_msg, default=False):
158+
self.console.print("[yellow]Bootstrap cancelled. Infrastructure will not be updated.[/yellow]")
159+
self.console.print("[red]Deployment cannot proceed without compatible infrastructure.[/red]")
160+
raise typer.Exit(1)
160161

161162
# User confirmed - proceed with bootstrap
162163
try:

src/pipeline/backend/comfyui/build_and_push_image.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ function build_and_push_image() {
4949
aws ecr get-login-password --region "${region}" | \
5050
docker login --username AWS --password-stdin "${ecr_repo_uri}"
5151

52+
# Update ECR policy file with current account ID
53+
sed -i "s/{{ACCOUNT_ID}}/${account}/g" "${policy_file}"
54+
5255
aws ecr set-repository-policy \
5356
--repository-name "${image_name}" \
5457
--policy-text "file://${policy_file}" \
Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,31 @@
11
{
22
"Version": "2008-10-17",
33
"Statement": [
4-
{
5-
"Sid": "new statement",
6-
"Effect": "Allow",
7-
"Principal": "*",
8-
"Action": [
9-
"ecr: CompleteLayerUpload",
10-
"ecr: InitiateLayerUpload",
11-
"ecr: ListImages",
12-
"ecr:BatchCheckLayerAvailability",
13-
"ecr:BatchGetImage",
14-
"ecr:DescribeImages",
15-
"ecr:DescribeRepositories",
16-
"ecr:GetDownloadUrlForLayer"
17-
]
18-
}
4+
{
5+
"Sid": "AllowAccountUserAccess",
6+
"Effect": "Allow",
7+
"Principal": {
8+
"AWS": "arn:aws:iam::{{ACCOUNT_ID}}:root"
9+
},
10+
"Action": [
11+
"ecr:BatchCheckLayerAvailability",
12+
"ecr:BatchGetImage",
13+
"ecr:GetDownloadUrlForLayer",
14+
"ecr:DescribeImages",
15+
"ecr:DescribeRepositories"
16+
]
17+
},
18+
{
19+
"Sid": "AllowSageMakerService",
20+
"Effect": "Allow",
21+
"Principal": {
22+
"Service": "sagemaker.amazonaws.com"
23+
},
24+
"Action": [
25+
"ecr:BatchCheckLayerAvailability",
26+
"ecr:BatchGetImage",
27+
"ecr:GetDownloadUrlForLayer"
28+
]
29+
}
1930
]
2031
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
FROM public.ecr.aws/aws-gcr-solutions/dmaa-vllm/vllm-openai:{{VERSION}} AS vllm-base
2+
3+
WORKDIR /opt/ml/code
4+
5+
COPY ./backend/vllm/requirements_dots_ocr.txt /opt/ml/code/
6+
7+
RUN python3 -m pip install -r /opt/ml/code/requirements_dots_ocr.txt
8+
9+
ENV PYTHONPATH="./emd_models:${PYTHONPATH}"
10+
11+
# see https://github.com/rednote-hilab/dots.ocr/blob/master/README.md#vllm-inference
12+
RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
13+
import dotsocr.modeling_dots_ocr_vllm' `which vllm`
14+
15+
EXPOSE 8080
16+
WORKDIR /opt/ml/code
17+
18+
ENTRYPOINT ["/usr/bin/serve"]

0 commit comments

Comments
 (0)