google
diff --git a/‎tests/rl/algorithm_config_test.py‎
Lines changed: 31 additions & 6 deletions b/‎tests/rl/algorithm_config_test.py‎
Lines changed: 31 additions & 6 deletions
diff --git a/‎tests/rl/function_registry_test.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/rl/function_registry_test.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/rl/grpo/dapo_learner_test.py‎
Lines changed: 192 additions & 5 deletions b/‎tests/rl/grpo/dapo_learner_test.py‎
Lines changed: 192 additions & 5 deletions
@@ -16,7 +16,6 @@
 from absl.testing import parameterized
 from tunix.rl import algorithm_config
 
-
 class AlgorithmConfigTest(parameterized.TestCase):
 
   def test_defaults_are_valid(self):
@@ -30,12 +29,19 @@ def test_defaults_are_valid(self):
       self.fail(f"Default AlgorithmConfig values raised ValueError: {e}")
 
   @parameterized.named_parameters(
-      dict(testcase_name="gspo_gae_ppo", algo="gspo", adv="gae", loss="ppo"),
+      dict(
+          testcase_name="gspo_gae_ppo", algo="gspo-token", adv="gae", loss="ppo"
+      ),
       dict(
           testcase_name="grpo_grpo_grpo", algo="grpo", adv="grpo", loss="grpo"
       ),
       dict(testcase_name="ppo_gae_ppo", algo="ppo", adv="gae", loss="ppo"),
-      dict(testcase_name="gspo_grpo_ppo", algo="gspo", adv="grpo", loss="ppo"),
+      dict(
+          testcase_name="gspo_grpo_ppo",
+          algo="gspo-token",
+          adv="grpo",
+          loss="ppo",
+      ),
   )
   def test_valid_combinations(self, algo: str, adv: str, loss: str):
     """Tests various valid combinations of core algorithm parameters."""
@@ -54,7 +60,6 @@ def test_valid_combinations(self, algo: str, adv: str, loss: str):
       )
 
   @parameterized.named_parameters(
-      dict(testcase_name="invalid_algo_dapo", value="dapo"),
       dict(testcase_name="invalid_algo_else", value="something_else"),
   )
   def test_invalid_algo_variant(self, value: str):
@@ -91,12 +96,14 @@ def test_kw_only_enforcement(self):
     """Ensures that positional arguments are not allowed."""
     with self.assertRaises(TypeError):
       # Attempt to initialize with positional arguments
-      algorithm_config.AlgorithmConfig("grpo", "grpo", "grpo")
+      algorithm_config.AlgorithmConfig("grpo-token", "grpo", "grpo")
 
     # Check that standard keyword initialization works
     try:
       algorithm_config.AlgorithmConfig(
-          algo_variant="gspo", advantage_estimator="gae", policy_loss_fn="ppo"
+          algo_variant="gspo-token",
+          advantage_estimator="gae",
+          policy_loss_fn="ppo",
       )
     except TypeError:
       self.fail("Keyword arguments failed for kw_only dataclass")
@@ -117,6 +124,24 @@ def test_field_assignment(self):
     config.algo_variant = "invalid_after_init"
     self.assertEqual(config.algo_variant, "invalid_after_init")
 
+  def test_config_logging(self):
+    """Tests that configuration is logged correctly upon initialization."""
+    # assertLogs catches logs at the specified level or higher
+    with self.assertLogs(level="INFO") as log:
+      algorithm_config.AlgorithmConfig(
+          algo_variant="gspo-token",
+          advantage_estimator="gae",
+          policy_loss_fn="ppo",
+      )
+
+    # log.output is a list of strings like ['INFO:root:message...']
+    full_log_output = "\n".join(log.output)
+
+    self.assertIn("Initializing AlgorithmConfig", full_log_output)
+    self.assertIn("algo_variant: gspo", full_log_output)
+    self.assertIn("advantage_estimator: gae", full_log_output)
+    self.assertIn("policy_loss_fn: ppo", full_log_output)
+
 
 if __name__ == "__main__":
   absltest.main()
@@ -52,7 +52,7 @@ def test_custom_categories_instance(self):
   def test_empty_categories_instance(self):
     # Test-specific instance for empty categories
     registry = function_registry.FunctionRegistry(allowed_categories=[])
-    self.assertLen(registry.list_categories(), 2)
+    self.assertLen(registry.list_categories(), 3)
 
   @parameterized.named_parameters(
       dict(
 
@@ -101,16 +101,203 @@ def test_diff_loss(self):
         grpo_loss.item(),
         msg=(
             "DAPO and GRPO loss values should be different for the same input"
-            " due to different configurations and potentially different"
-            " logic."
+            " due to different loss aggregation logics."
         ),
     )
 
     self.assertIn("kl", dapo_aux)
     self.assertIn("kl", grpo_aux)
-    self.assertNotEqual(
-        dapo_aux["kl"], grpo_aux["kl"]
-    )  # Expected as beta differs
+    self.assertEqual(dapo_aux["kl"], 0.0)  # DAPO does not have KL term.
+
+
+class TestDAPOConfigPostInit(parameterized.TestCase):
+
+  def test_valid_default(self):
+    """Tests that default values pass validation."""
+    try:
+      dapo_lib.DAPOConfig()
+    except ValueError as e:
+      self.fail(f"DAPOConfig raised ValueError on default initialization: {e}")
+
+  @parameterized.named_parameters(
+      dict(testcase_name="custom_epsilons", epsilon=0.1, epsilon_high=0.15),
+      dict(testcase_name="epsilons_equal", epsilon=0.1, epsilon_high=0.1),
+      dict(
+          testcase_name="buffer_disabled",
+          overlong_buffer={"enable": False},
+      ),
+      dict(testcase_name="buffer_none", overlong_buffer=None),
+      dict(
+          testcase_name="valid_buffer",
+          overlong_buffer={
+              "enable": True,
+              "overlong_buffer_length": 2000,
+              "overlong_buffer_penalty": 0.5,
+              "max_response_length": 10000,
+          },
+      ),
+  )
+  def test_valid_configurations(self, **kwargs):
+    """Tests various valid custom configurations."""
+    try:
+      dapo_lib.DAPOConfig(**kwargs)
+    except ValueError as e:
+      self.fail(f"DAPOConfig raised ValueError for valid case {kwargs}: {e}")
+
+  @parameterized.named_parameters(
+      dict(
+          testcase_name="invalid_epsilon_high",
+          config_kwargs=dict(epsilon=0.2, epsilon_high=0.1),
+          expected_regex=(
+              "epsilon_high must be greater than or equal to epsilon."
+          ),
+      ),
+      dict(
+          testcase_name="buffer_missing_length",
+          config_kwargs=dict(
+              overlong_buffer={
+                  "enable": True,
+                  "overlong_buffer_penalty": 1.0,
+                  "max_response_length": 20480,
+              }
+          ),
+          expected_regex=(
+              "overlong_buffer is enabled but missing.*overlong_buffer_length.*"
+          ),
+      ),
+      dict(
+          testcase_name="buffer_missing_penalty",
+          config_kwargs=dict(
+              overlong_buffer={
+                  "enable": True,
+                  "overlong_buffer_length": 4096,
+                  "max_response_length": 20480,
+              }
+          ),
+          expected_regex=(
+              "overlong_buffer is enabled but missing"
+              ".*overlong_buffer_penalty.*"
+          ),
+      ),
+      dict(
+          testcase_name="buffer_missing_max_length",
+          config_kwargs=dict(
+              overlong_buffer={
+                  "enable": True,
+                  "overlong_buffer_length": 4096,
+                  "overlong_buffer_penalty": 1.0,
+              }
+          ),
+          expected_regex=(
+              "overlong_buffer is enabled but missing.*max_response_length.*"
+          ),
+      ),
+      dict(
+          testcase_name="buffer_length_is_none",
+          config_kwargs=dict(
+              overlong_buffer={
+                  "enable": True,
+                  "overlong_buffer_length": None,
+                  "overlong_buffer_penalty": 1.0,
+                  "max_response_length": 20480,
+              }
+          ),
+          expected_regex=(
+              "overlong_buffer is enabled but missing.*overlong_buffer_length.*"
+          ),
+      ),
+      dict(
+          testcase_name="negative_penalty",
+          config_kwargs=dict(
+              overlong_buffer={
+                  "enable": True,
+                  "overlong_buffer_length": 4096,
+                  "overlong_buffer_penalty": -0.5,
+                  "max_response_length": 20480,
+              }
+          ),
+          expected_regex="overlong_buffer_penalty must be non-negative",
+      ),
+  )
+  def test_invalid_configurations(self, config_kwargs, expected_regex):
+    """Tests various invalid configurations that should raise ValueError."""
+    with self.assertRaisesRegex(ValueError, expected_regex):
+      dapo_lib.DAPOConfig(**config_kwargs)
+
+
+class RewardShapingTest(parameterized.TestCase):
+
+  def setUp(self):
+    super().setUp()
+    self.mock_cluster = mock.MagicMock()
+
+  def test_raises_error_on_none_buffer(self):
+    with self.assertRaisesRegex(
+        ValueError, "reward_shaping is called but with empty overlong_buffer."
+    ):
+
+      dapo_lib.reward_shaping(
+          prompts=["test prompt"],
+          completions=["test completion"],
+          mode=self.mock_cluster.Mode,
+          overlong_buffer=None,
+      )
+
+  @parameterized.named_parameters(
+      dict(
+          testcase_name="under_length",
+          lengths=[70],
+          expected_scores=[0.0],
+      ),
+      dict(
+          testcase_name="at_expected_length",
+          lengths=[80],
+          expected_scores=[0.0],
+      ),
+      dict(
+          testcase_name="in_buffer_zone",
+          lengths=[90],
+          expected_scores=[-5.0],
+      ),
+      dict(
+          testcase_name="at_max_length",
+          lengths=[100],
+          expected_scores=[-10.0],
+      ),
+      dict(
+          testcase_name="over_max_length",
+          lengths=[110],
+          expected_scores=[-15.0],
+      ),
+      dict(
+          testcase_name="mixed_lengths",
+          lengths=[70, 80, 90, 100, 110],
+          expected_scores=[0.0, 0.0, -5.0, -10.0, -15.0],
+      ),
+      dict(
+          testcase_name="zero_penalty",
+          lengths=[110],
+          expected_scores=[0.0],
+          penalty=0,
+      ),
+  )
+  def test_reward_scores(self, lengths, expected_scores, penalty=10):
+    completions = ["a" * length for length in lengths]
+    overlong_buffer = {
+        "overlong_buffer_length": 20,
+        "overlong_buffer_penalty": penalty,
+        "max_response_length": 100,
+    }
+    # expected_response_length = 100 - 20 = 80
+
+    scores = dapo_lib.reward_shaping(
+        prompts=[""] * len(completions),
+        completions=completions,
+        mode=self.mock_cluster.Mode,
+        overlong_buffer=overlong_buffer,
+    )
+
+    self.assertSequenceAlmostEqual(expected_scores, scores, places=4)
 
 
 if __name__ == "__main__":