Checklist / 检查清单
Bug Description / Bug 描述
首先开启sleep_level=1,报错
[rank1]: engine = GRPOVllmEngine(
[rank1]: ^^^^^^^^^^^^^^^
[rank1]: File "/app/ms-swift/swift/infer_engine/vllm_engine.py", line 203, in init
[rank1]: self._prepare_engine()
[rank1]: File "/app/ms-swift/swift/infer_engine/vllm_engine.py", line 226, in _prepare_engine
[rank1]: engine = llm_engine_cls.from_engine_args(self.engine_args)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/engine/llm_engine.py", line 176, in from_engine_args
[rank1]: return cls(
[rank1]: ^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/engine/llm_engine.py", line 110, in init
[rank1]: self.engine_core = EngineCoreClient.make_client(
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 96, in make_client
[rank1]: return InprocClient(vllm_config, executor_class, log_stats)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 269, in init
[rank1]: self.engine_core = EngineCore(*args, **kwargs)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/engine/core.py", line 106, in init
[rank1]: self.model_executor = executor_class(vllm_config)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/abstract.py", line 101, in init
[rank1]: self._init_executor()
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/uniproc_executor.py", line 163, in _init_executor
[rank1]: super()._init_executor()
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/uniproc_executor.py", line 48, in _init_executor
[rank1]: self.driver_worker.load_model()
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm_ascend/worker/worker.py", line 364, in load_model
[rank1]: with context, set_current_vllm_config(self.vllm_config):
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/contextlib.py", line 137, in enter
[rank1]: return next(self.gen)
[rank1]: ^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm_ascend/device_allocator/camem.py", line 241, in use_memory_pool
[rank1]: with use_memory_pool_with_allocator(self.python_malloc_callback, self.python_free_callback) as data:
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/contextlib.py", line 137, in enter
[rank1]: return next(self.gen)
[rank1]: ^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm_ascend/device_allocator/camem.py", line 106, in use_memory_pool_with_allocator
[rank1]: new_alloc = get_pluggable_allocator(python_malloc_fn, python_free_func)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm_ascend/device_allocator/camem.py", line 96, in get_pluggable_allocator
[rank1]: init_module(python_malloc_fn, python_free_func)
[rank1]: TypeError: 'NoneType' object is not callable
关闭后就不报错了,是不支持开启sleep_level吗?
其次开启vllm_tensor_parallel_size=8后,报错:
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/dynamo/symbolic_convert.py", line 1187, in inline_user_function_return
[rank3]: return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/dynamo/symbolic_convert.py", line 3724, in inline_call
[rank3]: return tracer.inline_call()
[rank3]: ^^^^^^^^^^^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/compilation/decorators.py", line 501, in patched_inline_call
[rank3]: return inline_call(self)
[rank3]: ^^^^^^^^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/dynamo/symbolic_convert.py", line 3903, in inline_call
[rank3]: self.run()
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 1337, in run
[rank3]: while self.step():
[rank3]: ^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 1246, in step
[rank3]: self.dispatch_table[inst.opcode](self, inst)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 819, in wrapper
[rank3]: return inner_fn(self, inst)
[rank3]: ^^^^^^^^^^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 2931, in CALL
[rank3]: self._call(inst)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 2925, in _call
[rank3]: self.call_function(fn, args, kwargs)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 1170, in call_function
[rank3]: self.push(fn.call_function(self, args, kwargs)) # type: ignore[arg-type]
[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/variables/misc.py", line 913, in call_function
[rank3]: return self.obj.call_method(tx, self.name, args, kwargs)
[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/variables/misc.py", line 280, in call_method
[rank3]: unimplemented(f"non-function or method super: {inner_fn}")
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/dynamo/exc.py", line 439, in unimplemented
[rank3]: raise Unsupported(msg, case_name=case_name)
[rank3]: torch.dynamo.exc.Unsupported: non-function or method super:
[rank3]: from user code:
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/models/qwen2.py", line 442, in forward
[rank3]: hidden_states, residual = layer(positions, hidden_states, residual)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/models/qwen3.py", line 220, in forward
[rank3]: hidden_states = self.self_attn(
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/models/qwen3.py", line 142, in forward
[rank3]: qkv, _ = self.qkv_proj(hidden_states)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm_ascend/ops/linear.py", line 160, in forward
[rank3]: return super().forward(input)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py", line 604, in forward
[rank3]: output_parallel = self.quant_method.apply(self, input, bias)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py", line 250, in apply
[rank3]: return dispatch_unquantized_gemm()(layer, x, layer.weight, bias)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm_ascend/patch/worker/patch_unquantized_gemm.py", line 52, in default_unquantized_gemm
[rank3]: return torch.ops.vllm.unquantized_gemm(x, weight, bias)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/parameter.py", line 126, in torch_function
[rank3]: return super().torch_function(func, types, args, kwargs)
这个该怎么解决,64GB*8,不开启vllm_tensor_parallel_size会OOM
How to Reproduce / 如何复现
swift rlhf
--model_type qwen3
--output_dir /checkpoint
--torch_dtype bfloat16
--num_train_epochs 1
--learning_rate 1e-05
--report_to tensorboard
--per_device_train_batch_size 1
--gradient_accumulation_steps 4
--max_length 1024
--logging_dir /home/admin/logs/tfevent
--save_steps 10
--logging_steps 5
--log_completions True
--use_vllm True
--vllm_mode colocate
--vllm_gpu_memory_utilization 0.6
--vllm_max_model_len 1024
--vllm_tensor_parallel_size 8
--template qwen3
--temperature 0.9
--top_p 0.85
--sleep_level 0
--reward_funcs accuracy
--warmup_ratio 0.1
--dataloader_num_workers 8
--max_completion_length 1024
--num_generations 4
--overlong_filter True
--lr_scheduler_type cosine
--overwrite_cache True
--overwrite_output_dir True
--use_fast_tokenizer True
--ignore_args_error True
--deepspeed zero3
--include_num_input_tokens_seen False
--move_model_batches 8
--rlhf_type grpo
--tuner_type lora
--model /rl-mnt/model/actor_model
--dataset /rl-mnt/data
Additional Information / 补充信息
No response
Checklist / 检查清单
Bug Description / Bug 描述
首先开启sleep_level=1,报错
[rank1]: engine = GRPOVllmEngine(
[rank1]: ^^^^^^^^^^^^^^^
[rank1]: File "/app/ms-swift/swift/infer_engine/vllm_engine.py", line 203, in init
[rank1]: self._prepare_engine()
[rank1]: File "/app/ms-swift/swift/infer_engine/vllm_engine.py", line 226, in _prepare_engine
[rank1]: engine = llm_engine_cls.from_engine_args(self.engine_args)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/engine/llm_engine.py", line 176, in from_engine_args
[rank1]: return cls(
[rank1]: ^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/engine/llm_engine.py", line 110, in init
[rank1]: self.engine_core = EngineCoreClient.make_client(
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 96, in make_client
[rank1]: return InprocClient(vllm_config, executor_class, log_stats)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 269, in init
[rank1]: self.engine_core = EngineCore(*args, **kwargs)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/engine/core.py", line 106, in init
[rank1]: self.model_executor = executor_class(vllm_config)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/abstract.py", line 101, in init
[rank1]: self._init_executor()
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/uniproc_executor.py", line 163, in _init_executor
[rank1]: super()._init_executor()
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/uniproc_executor.py", line 48, in _init_executor
[rank1]: self.driver_worker.load_model()
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm_ascend/worker/worker.py", line 364, in load_model
[rank1]: with context, set_current_vllm_config(self.vllm_config):
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/contextlib.py", line 137, in enter
[rank1]: return next(self.gen)
[rank1]: ^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm_ascend/device_allocator/camem.py", line 241, in use_memory_pool
[rank1]: with use_memory_pool_with_allocator(self.python_malloc_callback, self.python_free_callback) as data:
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/contextlib.py", line 137, in enter
[rank1]: return next(self.gen)
[rank1]: ^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm_ascend/device_allocator/camem.py", line 106, in use_memory_pool_with_allocator
[rank1]: new_alloc = get_pluggable_allocator(python_malloc_fn, python_free_func)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm_ascend/device_allocator/camem.py", line 96, in get_pluggable_allocator
[rank1]: init_module(python_malloc_fn, python_free_func)
[rank1]: TypeError: 'NoneType' object is not callable
关闭后就不报错了,是不支持开启sleep_level吗?
其次开启vllm_tensor_parallel_size=8后,报错:
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/dynamo/symbolic_convert.py", line 1187, in inline_user_function_return
[rank3]: return InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/dynamo/symbolic_convert.py", line 3724, in inline_call
[rank3]: return tracer.inline_call()
[rank3]: ^^^^^^^^^^^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/compilation/decorators.py", line 501, in patched_inline_call
[rank3]: return inline_call(self)
[rank3]: ^^^^^^^^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/dynamo/symbolic_convert.py", line 3903, in inline_call
[rank3]: self.run()
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 1337, in run
[rank3]: while self.step():
[rank3]: ^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 1246, in step
[rank3]: self.dispatch_table[inst.opcode](self, inst)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 819, in wrapper
[rank3]: return inner_fn(self, inst)
[rank3]: ^^^^^^^^^^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 2931, in CALL
[rank3]: self._call(inst)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 2925, in _call
[rank3]: self.call_function(fn, args, kwargs)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/symbolic_convert.py", line 1170, in call_function
[rank3]: self.push(fn.call_function(self, args, kwargs)) # type: ignore[arg-type]
[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/variables/misc.py", line 913, in call_function
[rank3]: return self.obj.call_method(tx, self.name, args, kwargs)
[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/_dynamo/variables/misc.py", line 280, in call_method
[rank3]: unimplemented(f"non-function or method super: {inner_fn}")
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/torch/dynamo/exc.py", line 439, in unimplemented
[rank3]: raise Unsupported(msg, case_name=case_name)
[rank3]: torch.dynamo.exc.Unsupported: non-function or method super:
[rank3]: from user code:
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/models/qwen2.py", line 442, in forward
[rank3]: hidden_states, residual = layer(positions, hidden_states, residual)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/models/qwen3.py", line 220, in forward
[rank3]: hidden_states = self.self_attn(
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/models/qwen3.py", line 142, in forward
[rank3]: qkv, _ = self.qkv_proj(hidden_states)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm_ascend/ops/linear.py", line 160, in forward
[rank3]: return super().forward(input)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py", line 604, in forward
[rank3]: output_parallel = self.quant_method.apply(self, input, bias)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py", line 250, in apply
[rank3]: return dispatch_unquantized_gemm()(layer, x, layer.weight, bias)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm_ascend/patch/worker/patch_unquantized_gemm.py", line 52, in default_unquantized_gemm
[rank3]: return torch.ops.vllm.unquantized_gemm(x, weight, bias)
[rank3]: File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/parameter.py", line 126, in torch_function
[rank3]: return super().torch_function(func, types, args, kwargs)
这个该怎么解决,64GB*8,不开启vllm_tensor_parallel_size会OOM
How to Reproduce / 如何复现
swift rlhf
--model_type qwen3
--output_dir /checkpoint
--torch_dtype bfloat16
--num_train_epochs 1
--learning_rate 1e-05
--report_to tensorboard
--per_device_train_batch_size 1
--gradient_accumulation_steps 4
--max_length 1024
--logging_dir /home/admin/logs/tfevent
--save_steps 10
--logging_steps 5
--log_completions True
--use_vllm True
--vllm_mode colocate
--vllm_gpu_memory_utilization 0.6
--vllm_max_model_len 1024
--vllm_tensor_parallel_size 8
--template qwen3
--temperature 0.9
--top_p 0.85
--sleep_level 0
--reward_funcs accuracy
--warmup_ratio 0.1
--dataloader_num_workers 8
--max_completion_length 1024
--num_generations 4
--overlong_filter True
--lr_scheduler_type cosine
--overwrite_cache True
--overwrite_output_dir True
--use_fast_tokenizer True
--ignore_args_error True
--deepspeed zero3
--include_num_input_tokens_seen False
--move_model_batches 8
--rlhf_type grpo
--tuner_type lora
--model /rl-mnt/model/actor_model
--dataset /rl-mnt/data
Additional Information / 补充信息
No response