Traceback (most recent call last):
File "/root/slime/train.py", line 106, in <module>
train(args)
File "/root/slime/train.py", line 66, in train
ray.get(rollout_manager.eval.remote(rollout_id))
File "/usr/local/lib/python3.12/dist-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 2967, in get
values, debugger_breakpoint = worker.get_objects(
^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 1015, in get_objects
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(TypeError): ray::RolloutManager.eval() (pid=533367, ip=wings-f80b67aa53c5-master-0, actor_id=e81a9cf9a8b8dfa836c19b9f02000000, repr=<slime.ray.rollout.Roll
outManager object at 0x7f42adba5f10>)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/slime/slime/ray/rollout.py", line 115, in eval
result = call_rollout_fn(self.eval_generate_rollout, self.args, rollout_id, self.data_source, evaluation=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/slime/slime/rollout/base_types.py", line 20, in call_rollout_fn
output = fn(*args, **kwargs, evaluation=evaluation)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/slime/slime/rollout/sglang_rollout.py", line 581, in generate_rollout
output, aborted_samples = generate_abortable_samples(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/slime/slime/rollout/sglang_rollout.py", line 596, in generate_abortable_samples
File "/root/slime/slime/rollout/sglang_rollout.py", line 596, in generate_abortable_samples
return run(eval_rollout(args, rollout_id))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/slime/slime/utils/async_utils.py", line 36, in run
return get_async_loop().run(coro)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/slime/slime/utils/async_utils.py", line 20, in run
return asyncio.run_coroutine_threadsafe(coro, self.loop).result()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/concurrent/futures/_base.py", line 456, in result
return self.__get_result()
^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/concurrent/futures/_base.py", line 401, in __get_result
raise self._exception
File "/root/slime/slime/rollout/sglang_rollout.py", line 460, in eval_rollout
results_list = await asyncio.gather(*coros)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/slime/slime/rollout/sglang_rollout.py", line 539, in eval_rollout_single_dataset
sample = await coro
^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/tasks.py", line 631, in _wait_for_one
return f.result() # May raise f.exception().
^^^^^^^^^^
File "/root/slime/slime/rollout/sglang_rollout.py", line 258, in generate_and_rm
sample.reward = await async_rm(args, sample)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/slime/slime/rollout/rm_hub/__init__.py", line 33, in async_rm
return await rm_function(args, sample, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/slime/examples/retool/generate_with_retool.py", line 360, in reward_func
solution_str = sample.prompt + sample.response
~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~
TypeError: can only concatenate list (not "str") to list
Status message: Job entrypoint command failed with exit code 1, last available logs (truncated to 20,000 chars):
File "/root/slime/slime/rollout/rm_hub/__init__.py", line 33, in async_rm
return await rm_function(args, sample, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/slime/examples/retool/generate_with_retool.py", line 360, in reward_func
solution_str = sample.prompt + sample.response
~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~
TypeError: can only concatenate list (not "str") to list
#!/bin/bash
# for rerun the task
pkill -9 sglang
sleep 3
ray stop --force
pkill -9 ray
pkill -9 python
sleep 3
pkill -9 ray
pkill -9 python
set -ex
# will prevent ray from buffering stdout/stderr
export PYTHONBUFFERED=16
NVLINK_COUNT=$(nvidia-smi topo -m 2>/dev/null | grep -o 'NV[0-9][0-9]*' | wc -l)
if [ "$NVLINK_COUNT" -gt 0 ]; then
HAS_NVLINK=1
else
HAS_NVLINK=0
fi
echo "HAS_NVLINK: $HAS_NVLINK (detected $NVLINK_COUNT NVLink references)"
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
source "/root/slime/scripts/models/qwen3-4B.sh"
CKPT_ARGS=(
--hf-checkpoint ***/huggingface_modelscope/qwen3-4b-sft
--ref-load ***/huggingface_modelscope/qwen3-4b-sft_torch_dist/release
--save ***/huggingface_modelscope/qwen3-4b-sft-multi-turn/
--save-interval 20
--rotary-base 5000000
--megatron-to-hf-mode bridge
)
ROLLOUT_ARGS=(
--prompt-data ***/huggingface_modelscope/zhuzilin/dapo-math-17k/dapo-math-17k.jsonl
--input-key prompt
--label-key label
--apply-chat-template
--rollout-shuffle
--reward-key score
--num-rollout 3000
--rollout-batch-size 32
--n-samples-per-prompt 8
--rollout-max-response-len 8192
--rollout-temperature 0.8
--global-batch-size 256
--balance-data
)
EVAL_ARGS=(
--eval-interval 20
--eval-prompt-data aime ***/huggingface_modelscope/zhuzilin/aime-2024/aime-2024.jsonl
--n-samples-per-eval-prompt 16
--eval-max-response-len 16384
--eval-top-p 0.7
)
PERF_ARGS=(
--tensor-model-parallel-size 2
--sequence-parallel
--pipeline-model-parallel-size 1
--context-parallel-size 1
--expert-model-parallel-size 1
--expert-tensor-parallel-size 1
--recompute-granularity full
--recompute-method uniform
--recompute-num-layers 1
--micro-batch-size 1
# --use-dynamic-batch-size
--max-tokens-per-gpu 9216
)
GRPO_ARGS=(
--advantage-estimator grpo
--use-kl-loss
--kl-loss-coef 0.00
--kl-loss-type low_var_kl
--entropy-coef 0.00
--eps-clip 0.2
--eps-clip-high 0.28
)
OPTIMIZER_ARGS=(
--optimizer adam
--lr 1e-6
--lr-decay-style constant
--weight-decay 0.1
--adam-beta1 0.9
--adam-beta2 0.98
)
WANDB_ARGS=(
--use-tensorboard
# --use-wandb
# --wandb-project slime-dapo
# --wandb-group qwen3-4B-test-multi-turn
# --wandb-key ${WANDB_KEY}
)
SGLANG_ARGS=(
--rollout-num-gpus-per-engine 2
--sglang-mem-fraction-static 0.7
)
MISC_ARGS=(
# default dropout in megatron is 0.1
--attention-dropout 0.0
--hidden-dropout 0.0
# should be good for model performance
--accumulate-allreduce-grads-in-fp32
--attention-softmax-in-fp32
# need to comment this when using model with MLA
--attention-backend flash
)
CUSTOM_ARGS=(
--custom-generate-function-path generate_with_retool.generate
--custom-rm-path generate_with_retool.reward_func
)
# launch the master node of ray in container
export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus 8 --disable-usage-stats --dashboard-host=0.0.0.0 --dashboard-port=8265
# Build the runtime environment JSON with proper variable substitution
RUNTIME_ENV_JSON="{
\"env_vars\": {
\"PYTHONPATH\": \"/root/Megatron-LM/:${SCRIPT_DIR}:/root/slime\",
\"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\",
\"TENSORBOARD_DIR\": \"***/huggingface_modelscope/tensorboard_local\",
\"NCCL_NVLS_ENABLE\": \"${HAS_NVLINK}\"
}
}"
ray job submit --address="http://127.0.0.1:8265" \
--runtime-env-json="${RUNTIME_ENV_JSON}" \
-- python3 train.py \
--actor-num-nodes 1 \
--actor-num-gpus-per-node 8 \
--colocate \
${MODEL_ARGS[@]} \
${CKPT_ARGS[@]} \
${ROLLOUT_ARGS[@]} \
${OPTIMIZER_ARGS[@]} \
${GRPO_ARGS[@]} \
${WANDB_ARGS[@]} \
${PERF_ARGS[@]} \
${EVAL_ARGS[@]} \
${SGLANG_ARGS[@]} \
${MISC_ARGS[@]} \
${CUSTOM_ARGS[@]}
docker
slimerl/slime:latest
cd /root/slime
bash examples/retool/retool_qwen3_4b_rl.sh
Bug Description
/root/slime/examples/retool
bash examples/retool/retool_qwen3_4b_rl.sh
Steps to Reproduce
Expected Behavior
Actual Behavior
Environment
Logs
Additional Context
No response
Pre-submission Checklist