Skip to content

Unify server-side and model-side Config(Part-5) #3081

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: develop
Choose a base branch
from
Open
364 changes: 347 additions & 17 deletions fastdeploy/config.py

Large diffs are not rendered by default.

23 changes: 13 additions & 10 deletions fastdeploy/engine/args_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,21 @@
from fastdeploy.config import (
CacheConfig,
EarlyStopConfig,
FDConfig,
GraphOptimizationConfig,
LoadConfig,
ModelConfig,
ParallelConfig,
SpeculativeConfig,
TaskOption,
)
from fastdeploy.engine.config import Config
from fastdeploy.platforms import current_platform
from fastdeploy.scheduler.config import SchedulerConfig
from fastdeploy.utils import DeprecatedOptionWarning, FlexibleArgumentParser
from fastdeploy.utils import (
DeprecatedOptionWarning,
FlexibleArgumentParser,
is_port_available,
)


def nullable_str(x: str) -> Optional[str]:
Expand Down Expand Up @@ -892,7 +896,7 @@ def create_early_stop_config(self) -> EarlyStopConfig:
early_stop_args[k] = v
return EarlyStopConfig(early_stop_args)

def create_engine_config(self) -> Config:
def create_engine_config(self) -> FDConfig:
"""
Create and return a Config object based on the current settings.
"""
Expand Down Expand Up @@ -927,16 +931,18 @@ def create_engine_config(self) -> Config:
self.tensor_parallel_size <= 1 and self.enable_custom_all_reduce
), "enable_custom_all_reduce must be used with tensor_parallel_size>1"

return Config(
model_name_or_path=self.model,
assert is_port_available(
"0.0.0.0", self.engine_worker_queue_port
), f"The parameter `engine_worker_queue_port`:{self.engine_worker_queue_port} is already in use."

return FDConfig(
model_config=model_cfg,
scheduler_config=scheduler_cfg,
tokenizer=self.tokenizer,
cache_config=cache_cfg,
load_config=load_cfg,
parallel_config=parallel_cfg,
max_model_len=self.max_model_len,
tensor_parallel_size=self.tensor_parallel_size,
max_num_seqs=self.max_num_seqs,
speculative_config=speculative_cfg,
max_num_batched_tokens=self.max_num_batched_tokens,
Expand All @@ -945,18 +951,15 @@ def create_engine_config(self) -> Config:
engine_worker_queue_port=self.engine_worker_queue_port,
limit_mm_per_prompt=self.limit_mm_per_prompt,
mm_processor_kwargs=self.mm_processor_kwargs,
# enable_mm=self.enable_mm,
reasoning_parser=self.reasoning_parser,
tool_parser=self.tool_call_parser,
splitwise_role=self.splitwise_role,
innode_prefill_ports=self.innode_prefill_ports,
max_num_partial_prefills=self.max_num_partial_prefills,
max_long_partial_prefills=self.max_long_partial_prefills,
long_prefill_token_threshold=self.long_prefill_token_threshold,
graph_optimization_config=graph_opt_cfg,
graph_opt_config=graph_opt_cfg,
guided_decoding_backend=self.guided_decoding_backend,
disable_any_whitespace=self.guided_decoding_disable_any_whitespace,
enable_logprob=self.enable_logprob,
early_stop_config=early_stop_cfg,
load_choices=self.load_choices,
)
Loading
Loading