Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions model-engine/model_engine_server/common/dtos/llms/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ class VLLMModelConfig(BaseModel):
description="Enable auto tool choice",
)

reasoning_parser: Optional[str] = Field(
None,
description="Reasoning parser (e.g. 'nemotron_v3', 'deepseek_r1')",
)

load_format: Optional[str] = Field(
None,
description="The format of the model weights to load.\n\n"
Expand Down
14 changes: 12 additions & 2 deletions model-engine/model_engine_server/inference/vllm/vllm_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,15 @@ async def init_engine(
seed=request.model_cfg.seed or 0,
gpu_memory_utilization=request.max_gpu_memory_utilization or 0.9,
)
engine_args_dict = {**default_engine_args_dict, **parsed_configs.model_dump(exclude_none=True)}
_serving_only_keys = {"reasoning_parser"}
engine_args_dict = {
k: v
for k, v in {
**default_engine_args_dict,
**parsed_configs.model_dump(exclude_none=True),
}.items()
if k not in _serving_only_keys
}
Comment thread
greptile-apps[bot] marked this conversation as resolved.
Outdated
print("vLLM engine args:", engine_args_dict, flush=True)

engine_args = AsyncEngineArgs(**engine_args_dict)
Expand Down Expand Up @@ -319,7 +327,9 @@ async def init_engine(
return_tokens_as_token_ids=False,
enable_auto_tool_choice=False,
tool_call_parser=None,
structured_outputs_config=argparse.Namespace(reasoning_parser=None),
structured_outputs_config=argparse.Namespace(
reasoning_parser=parsed_configs.reasoning_parser
),
enable_prompt_tokens_details=False,
enable_force_include_usage=False,
enable_log_outputs=False,
Expand Down