diff --git a/src/imitater/model/chat_model.py b/src/imitater/model/chat_model.py index 02007c4..9b51f50 100644 --- a/src/imitater/model/chat_model.py +++ b/src/imitater/model/chat_model.py @@ -74,6 +74,7 @@ def _init_vllm_engine(self) -> None: trust_remote_code=True, max_model_len=self.config.maxlen, tensor_parallel_size=len(self.config.device), + dtype="float16", ) self._engine = AsyncLLMEngine.from_engine_args(engine_args)