google · wang2yn84 · Jan 10, 2026
@@ -1,4 +1,4 @@
-reference_model_config:
+model_config:
   model_name: "gemma2-2b-it"
   model_id: "google/gemma-2/flax/gemma2-2b-it"
   model_source: "kaggle"

@@ -19,13 +19,13 @@ batch_size=${batch_size:-1}
 num_batches=${num_batches:-3738}
 num_train_epochs=${num_train_epochs:-1}
 warmup_ratio=${warmup_ratio:-0.1}
-train_fraction=${train_fraction:-1.0} 
+train_fraction=${train_fraction:-1.0}
 
 echo "Using parameters:"
 echo "  Batch Size: $batch_size"
 echo "  Num Batches: $num_batches"
 echo "  Num Epochs: $num_train_epochs"
-echo "  Warmup Ratio: $warmup_ratio" 
+echo "  Warmup Ratio: $warmup_ratio"
 echo "  Train Fraction: $train_fraction"
 
 max_steps_float=$(awk "BEGIN {print $batch_size * $num_batches * $num_train_epochs * $train_fraction}")
@@ -41,8 +41,8 @@ echo "Rounded warmup steps: $warmup_steps"
 python3 -m tunix.cli.grpo_main \
   tunix/cli/base_config.yaml \
   override_config_file=examples/rl/grpo/gsm8k/configs/gemma2_2b.yaml \
-  reference_model_config.model_download_path="/tmp/models/gemma2-2b" \
-  reference_model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/1" \
+  model_config.model_download_path="/tmp/models/gemma2-2b" \
+  model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/1" \
   tokenizer_config.tokenizer_path="/tmp/models/gemma2-2b/models/google/gemma-2/flax/gemma2-2b-it/1/tokenizer.model" \
   batch_size=$batch_size \
   num_batches=$num_batches \

@@ -19,13 +19,13 @@ batch_size=${batch_size:-1}
 num_batches=${num_batches:-3738}
 num_train_epochs=${num_train_epochs:-1}
 warmup_ratio=${warmup_ratio:-0.1}
-train_fraction=${train_fraction:-1.0} 
+train_fraction=${train_fraction:-1.0}
 
 echo "Using parameters:"
 echo "  Batch Size: $batch_size"
 echo "  Num Batches: $num_batches"
 echo "  Num Epochs: $num_train_epochs"
-echo "  Warmup Ratio: $warmup_ratio" 
+echo "  Warmup Ratio: $warmup_ratio"
 echo "  Train Fraction: $train_fraction"
 
 max_steps_float=$(awk "BEGIN {print $batch_size * $num_batches * $num_train_epochs * $train_fraction}")
@@ -40,14 +40,14 @@ echo "Rounded warmup steps: $warmup_steps"
 
 python3 -m tunix.cli.grpo_main \
   base_config.yaml \
-  reference_model_config.model_name="gemma-3-12b-it" \
-  reference_model_config.model_id="google/gemma-3-12b-it" \
-  reference_model_config.model_path="gs://gemma-data/checkpoints/gemma3-12b-it" \
-  reference_model_config.model_source="gcs" \
-  reference_model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/gemma3_12b" \
-  reference_model_config.mesh.shape="(2,4)" \
-  reference_model_config.mesh.axis_names="('fsdp','tp')" \
-  reference_model_config.rng_seed=42 \
+  model_config.model_name="gemma-3-12b-it" \
+  model_config.model_id="google/gemma-3-12b-it" \
+  model_config.model_path="gs://gemma-data/checkpoints/gemma3-12b-it" \
+  model_config.model_source="gcs" \
+  model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/gemma3_12b" \
+  model_config.mesh.shape="(2,4)" \
+  model_config.mesh.axis_names="('fsdp','tp')" \
+  model_config.rng_seed=42 \
   actor_model_config.lora_config.rank=64 \
   actor_model_config.lora_config.alpha=64.0 \
   actor_model_config.lora_config.module_path=".*q_einsum|.*kv_einsum|.*gate_proj|.*down_proj|.*up_proj|.*attn_vec_einsum" \

@@ -19,13 +19,13 @@ batch_size=${batch_size:-1}
 num_batches=${num_batches:-3738}
 num_train_epochs=${num_train_epochs:-1}
 warmup_ratio=${warmup_ratio:-0.1}
-train_fraction=${train_fraction:-1.0} 
+train_fraction=${train_fraction:-1.0}
 
 echo "Using parameters:"
 echo "  Batch Size: $batch_size"
 echo "  Num Batches: $num_batches"
 echo "  Num Epochs: $num_train_epochs"
-echo "  Warmup Ratio: $warmup_ratio" 
+echo "  Warmup Ratio: $warmup_ratio"
 echo "  Train Fraction: $train_fraction"
 
 max_steps_float=$(awk "BEGIN {print $batch_size * $num_batches * $num_train_epochs * $train_fraction}")
@@ -40,14 +40,14 @@ echo "Rounded warmup steps: $warmup_steps"
 
 python3 -m tunix.cli.grpo_main \
   base_config.yaml \
-  reference_model_config.model_name="gemma-3-1b-it" \
-  reference_model_config.model_id="google/gemma-3-1b-it" \
-  reference_model_config.model_path="gs://gemma-data/checkpoints/gemma3-1b-it" \
-  reference_model_config.model_source="gcs" \
-  reference_model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/gemma3_1b" \
-  reference_model_config.mesh.shape="(2,4)" \
-  reference_model_config.mesh.axis_names="('fsdp','tp')" \
-  reference_model_config.rng_seed=42 \
+  model_config.model_name="gemma-3-1b-it" \
+  model_config.model_id="google/gemma-3-1b-it" \
+  model_config.model_path="gs://gemma-data/checkpoints/gemma3-1b-it" \
+  model_config.model_source="gcs" \
+  model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/gemma3_1b" \
+  model_config.mesh.shape="(2,4)" \
+  model_config.mesh.axis_names="('fsdp','tp')" \
+  model_config.rng_seed=42 \
   actor_model_config.lora_config.rank=64 \
   actor_model_config.lora_config.alpha=64.0 \
   actor_model_config.lora_config.module_path=".*q_einsum|.*kv_einsum|.*gate_proj|.*down_proj|.*up_proj|.*attn_vec_einsum" \

@@ -19,13 +19,13 @@ batch_size=${batch_size:-1}
 num_batches=${num_batches:-3738}
 num_train_epochs=${num_train_epochs:-1}
 warmup_ratio=${warmup_ratio:-0.1}
-train_fraction=${train_fraction:-1.0} 
+train_fraction=${train_fraction:-1.0}
 
 echo "Using parameters:"
 echo "  Batch Size: $batch_size"
 echo "  Num Batches: $num_batches"
 echo "  Num Epochs: $num_train_epochs"
-echo "  Warmup Ratio: $warmup_ratio" 
+echo "  Warmup Ratio: $warmup_ratio"
 echo "  Train Fraction: $train_fraction"
 
 max_steps_float=$(awk "BEGIN {print $batch_size * $num_batches * $num_train_epochs * $train_fraction}")
@@ -40,14 +40,14 @@ echo "Rounded warmup steps: $warmup_steps"
 
 python3 -m tunix.cli.grpo_main \
   base_config.yaml \
-  reference_model_config.model_name="gemma-3-4b-it" \
-  reference_model_config.model_id="google/gemma-3-4b-it" \
-  reference_model_config.model_path="gs://gemma-data/checkpoints/gemma3-4b-it" \
-  reference_model_config.model_source="gcs" \
-  reference_model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/gemma3_4b" \
-  reference_model_config.mesh.shape="(2,4)" \
-  reference_model_config.mesh.axis_names="('fsdp','tp')" \
-  reference_model_config.rng_seed=42 \
+  model_config.model_name="gemma-3-4b-it" \
+  model_config.model_id="google/gemma-3-4b-it" \
+  model_config.model_path="gs://gemma-data/checkpoints/gemma3-4b-it" \
+  model_config.model_source="gcs" \
+  model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/gemma3_4b" \
+  model_config.mesh.shape="(2,4)" \
+  model_config.mesh.axis_names="('fsdp','tp')" \
+  model_config.rng_seed=42 \
   actor_model_config.lora_config.rank=64 \
   actor_model_config.lora_config.alpha=64.0 \
   actor_model_config.lora_config.module_path=".*q_einsum|.*kv_einsum|.*gate_proj|.*down_proj|.*up_proj|.*attn_vec_einsum" \

@@ -19,13 +19,13 @@ batch_size=${batch_size:-8}
 num_batches=${num_batches:-3738}
 num_train_epochs=${num_train_epochs:-1}
 warmup_ratio=${warmup_ratio:-0.1}
-train_fraction=${train_fraction:-1.0} 
+train_fraction=${train_fraction:-1.0}
 
 echo "Using parameters:"
 echo "  Batch Size: $batch_size"
 echo "  Num Batches: $num_batches"
 echo "  Num Epochs: $num_train_epochs"
-echo "  Warmup Ratio: $warmup_ratio" 
+echo "  Warmup Ratio: $warmup_ratio"
 echo "  Train Fraction: $train_fraction"
 
 
@@ -41,14 +41,14 @@ echo "Rounded warmup steps: $warmup_steps"
 
 python3 -m tunix.cli.grpo_main \
   base_config.yaml \
-  reference_model_config.model_name="gemma-7b-it" \
-  reference_model_config.model_id="google/gemma/flax/7b-it" \
-  reference_model_config.model_source="kaggle" \
-  reference_model_config.model_download_path="/tmp/models/gemma-7b" \
-  reference_model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/1" \
-  reference_model_config.mesh.shape="(2,4)" \
-  reference_model_config.mesh.axis_names="('fsdp','tp')" \
-  reference_model_config.rng_seed=42 \
+  model_config.model_name="gemma-7b-it" \
+  model_config.model_id="google/gemma/flax/7b-it" \
+  model_config.model_source="kaggle" \
+  model_config.model_download_path="/tmp/models/gemma-7b" \
+  model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/1" \
+  model_config.mesh.shape="(2,4)" \
+  model_config.mesh.axis_names="('fsdp','tp')" \
+  model_config.rng_seed=42 \
   actor_model_config.lora_config.rank=64 \
   actor_model_config.lora_config.alpha=64.0 \
   actor_model_config.lora_config.module_path=".*q_einsum|.*kv_einsum|.*gate_proj|.*down_proj|.*up_proj|.*attn_vec_einsum" \
@@ -94,4 +94,4 @@ python3 -m tunix.cli.grpo_main \
   grpo_config.num_iterations=1 \
   grpo_config.beta=0.08 \
   grpo_config.epsilon=0.2 \
-  reward_functions="['tunix/cli/reward_fn/gsm8k.py']"
+  reward_functions="['tunix/cli/reward_fn/gsm8k.py']"
@@ -19,13 +19,13 @@ batch_size=${batch_size:-8}
 num_batches=${num_batches:-3738}
 num_train_epochs=${num_train_epochs:-1}
 warmup_ratio=${warmup_ratio:-0.1}
-train_fraction=${train_fraction:-1.0} 
+train_fraction=${train_fraction:-1.0}
 
 echo "Using parameters:"
 echo "  Batch Size: $batch_size"
 echo "  Num Batches: $num_batches"
 echo "  Num Epochs: $num_train_epochs"
-echo "  Warmup Ratio: $warmup_ratio" 
+echo "  Warmup Ratio: $warmup_ratio"
 echo "  Train Fraction: $train_fraction"
 
 max_steps_float=$(awk "BEGIN {print $batch_size * $num_batches * $num_train_epochs * $train_fraction}")
@@ -40,13 +40,13 @@ echo "Rounded warmup steps: $warmup_steps"
 
 python3 -m tunix.cli.grpo_main \
   base_config.yaml \
-  reference_model_config.model_name="llama-3.1-8b" \
-  reference_model_config.model_id="meta-llama/Llama-3.1-8B" \
-  reference_model_config.model_source="huggingface" \
-  reference_model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/1" \
-  reference_model_config.mesh.shape="(2,4)" \
-  reference_model_config.mesh.axis_names="('fsdp','tp')" \
-  reference_model_config.rng_seed=42 \
+  model_config.model_name="llama-3.1-8b" \
+  model_config.model_id="meta-llama/Llama-3.1-8B" \
+  model_config.model_source="huggingface" \
+  model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/1" \
+  model_config.mesh.shape="(2,4)" \
+  model_config.mesh.axis_names="('fsdp','tp')" \
+  model_config.rng_seed=42 \
   actor_model_config.lora_config.rank=64 \
   actor_model_config.lora_config.alpha=64.0 \
   actor_model_config.lora_config.module_path=".*q_einsum|.*kv_einsum|.*gate_proj|.*down_proj|.*up_proj|.*attn_vec_einsum" \

@@ -19,13 +19,13 @@ batch_size=${batch_size:-1}
 num_batches=${num_batches:-3738}
 num_train_epochs=${num_train_epochs:-1}
 warmup_ratio=${warmup_ratio:-0.1}
-train_fraction=${train_fraction:-1.0} 
+train_fraction=${train_fraction:-1.0}
 
 echo "Using parameters:"
 echo "  Batch Size: $batch_size"
 echo "  Num Batches: $num_batches"
 echo "  Num Epochs: $num_train_epochs"
-echo "  Warmup Ratio: $warmup_ratio" 
+echo "  Warmup Ratio: $warmup_ratio"
 echo "  Train Fraction: $train_fraction"
 
 max_steps_float=$(awk "BEGIN {print $batch_size * $num_batches * $num_train_epochs * $train_fraction}")
@@ -40,13 +40,13 @@ echo "Rounded warmup steps: $warmup_steps"
 
 python3 -m tunix.cli.grpo_main \
   base_config.yaml \
-  reference_model_config.model_name="llama3.2-1b" \
-  reference_model_config.model_id="meta-llama/Llama-3.2-1B" \
-  reference_model_config.model_source="huggingface" \
-  reference_model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/1" \
-  reference_model_config.mesh.shape="(2,4)" \
-  reference_model_config.mesh.axis_names="('fsdp','tp')" \
-  reference_model_config.rng_seed=42 \
+  model_config.model_name="llama3.2-1b" \
+  model_config.model_id="meta-llama/Llama-3.2-1B" \
+  model_config.model_source="huggingface" \
+  model_config.intermediate_ckpt_dir="/tmp/intermediate_ckpt/1" \
+  model_config.mesh.shape="(2,4)" \
+  model_config.mesh.axis_names="('fsdp','tp')" \
+  model_config.rng_seed=42 \
   actor_model_config.lora_config.rank=64 \
   actor_model_config.lora_config.alpha=64.0 \
   actor_model_config.lora_config.module_path=".*q_einsum|.*kv_einsum|.*gate_proj|.*down_proj|.*up_proj|.*attn_vec_einsum" \
@@ -92,4 +92,4 @@ python3 -m tunix.cli.grpo_main \
   grpo_config.num_iterations=1 \
   grpo_config.beta=0.08 \
   grpo_config.epsilon=0.2 \
-  reward_functions="['tunix/cli/reward_fn/gsm8k.py']"
+  reward_functions="['tunix/cli/reward_fn/gsm8k.py']"
@@ -19,13 +19,13 @@ batch_size=${batch_size:-12}
 num_batches=${num_batches:-8192}
 num_train_epochs=${num_train_epochs:-1}
 warmup_ratio=${warmup_ratio:-0.1}
-train_fraction=${train_fraction:-1.0} 
+train_fraction=${train_fraction:-1.0}
 
 echo "Using parameters:"
 echo "  Batch Size: $batch_size"
 echo "  Num Batches: $num_batches"
 echo "  Num Epochs: $num_train_epochs"
-echo "  Warmup Ratio: $warmup_ratio" 
+echo "  Warmup Ratio: $warmup_ratio"
 echo "  Train Fraction: $train_fraction"
 
 max_steps_float=$(awk "BEGIN {print $batch_size * $num_batches * $num_train_epochs * $train_fraction}")
@@ -40,13 +40,13 @@ echo "Rounded warmup steps: $warmup_steps"
 
 python3 -m tunix.cli.grpo_main \
   base_config.yaml \
-  reference_model_config.model_name="llama3.2-1b" \
-  reference_model_config.model_id="meta-llama/Llama-3.2-1B-Instruct" \
-  reference_model_config.model_source="huggingface" \
-  reference_model_config.intermediate_ckpt_dir="/tmp/tunix/experiments/grpo/llama3p2_1b_gsm8k" \
-  reference_model_config.mesh.shape="(4,1)" \
-  reference_model_config.mesh.axis_names="('fsdp','tp')" \
-  reference_model_config.rng_seed=42 \
+  model_config.model_name="llama3.2-1b" \
+  model_config.model_id="meta-llama/Llama-3.2-1B-Instruct" \
+  model_config.model_source="huggingface" \
+  model_config.intermediate_ckpt_dir="/tmp/tunix/experiments/grpo/llama3p2_1b_gsm8k" \
+  model_config.mesh.shape="(4,1)" \
+  model_config.mesh.axis_names="('fsdp','tp')" \
+  model_config.rng_seed=42 \
   actor_model_config.mesh.shape="(4,1)" \
   actor_model_config.mesh.axis_names="('fsdp','tp')" \
   actor_model_config.lora_config={} \
@@ -92,4 +92,4 @@ python3 -m tunix.cli.grpo_main \
   grpo_config.beta=0.08 \
   grpo_config.epsilon=0.2 \
   reward_functions="['tunix/cli/reward_fn/gsm8k_verl.py']" \
-  verl_compatible=true
+  verl_compatible=true