add disable_custom_all_reduce for vllm multi-gpu inference

xusenlin · xusenlin · commit 509c0ba49cc1 · 2024-06-07T23:46:40.000+08:00
diff --git a/api/config.py b/api/config.py
@@ -224,6 +224,9 @@ class VLLMSetting(BaseModel):
     lora_modules: Optional[str] = Field(
         default=get_env("LORA_MODULES", ""),
     )
+    disable_custom_all_reduce: Optional[bool] = Field(
+        default=get_bool_env("DISABLE_CUSTOM_ALL_REDUCE"),
+    )
     vllm_disable_log_stats: Optional[bool] = Field(
         default=get_bool_env("VLLM_DISABLE_LOG_STATS", "true"),
     )
diff --git a/api/models.py b/api/models.py
@@ -112,6 +112,7 @@ def create_vllm_engine():
         "max_loras",
         "max_lora_rank",
         "lora_extra_vocab_size",
+        "disable_custom_all_reduce",
     }
 
     if vllm_version >= "0.4.3":

Original file line number	Diff line number	Diff line change
`@@ -224,6 +224,9 @@ class VLLMSetting(BaseModel):`
`224`	`224`	`lora_modules: Optional[str] = Field(`
`225`	`225`	`default=get_env("LORA_MODULES", ""),`
`226`	`226`	`)`
	`227`	`+ disable_custom_all_reduce: Optional[bool] = Field(`
	`228`	`+ default=get_bool_env("DISABLE_CUSTOM_ALL_REDUCE"),`
	`229`	`+ )`
`227`	`230`	`vllm_disable_log_stats: Optional[bool] = Field(`
`228`	`231`	`default=get_bool_env("VLLM_DISABLE_LOG_STATS", "true"),`
`229`	`232`	`)`