diff --git a/xinference/core/worker.py b/xinference/core/worker.py index 0e4b25ac3d..f87be367b5 100644 --- a/xinference/core/worker.py +++ b/xinference/core/worker.py @@ -1429,6 +1429,10 @@ async def launch_builtin_model( if isinstance(n_gpu, str) and n_gpu != "auto": raise ValueError("Currently `n_gpu` only supports `auto`.") + device = kwargs.get("device") + if device and device.lower().startswith("cpu"): + n_gpu = None + if peft_model_config is not None: if model_type in ("embedding", "rerank"): raise ValueError(