Skip to content

Commit 509c0ba

Browse files
author
xusenlin
committed
add disable_custom_all_reduce for vllm multi-gpu inference
1 parent 016bdff commit 509c0ba

File tree

2 files changed

+4
-0
lines changed

2 files changed

+4
-0
lines changed

api/config.py

+3
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,9 @@ class VLLMSetting(BaseModel):
224224
lora_modules: Optional[str] = Field(
225225
default=get_env("LORA_MODULES", ""),
226226
)
227+
disable_custom_all_reduce: Optional[bool] = Field(
228+
default=get_bool_env("DISABLE_CUSTOM_ALL_REDUCE"),
229+
)
227230
vllm_disable_log_stats: Optional[bool] = Field(
228231
default=get_bool_env("VLLM_DISABLE_LOG_STATS", "true"),
229232
)

api/models.py

+1
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def create_vllm_engine():
112112
"max_loras",
113113
"max_lora_rank",
114114
"lora_extra_vocab_size",
115+
"disable_custom_all_reduce",
115116
}
116117

117118
if vllm_version >= "0.4.3":

0 commit comments

Comments
 (0)