diff --git a/src/mjlab/envs/manager_based_rl_env.py b/src/mjlab/envs/manager_based_rl_env.py
index 11563c97c..7f7eea09a 100644
--- a/src/mjlab/envs/manager_based_rl_env.py
+++ b/src/mjlab/envs/manager_based_rl_env.py
@@ -85,6 +85,13 @@ class ManagerBasedRlEnvCfg:
     receives a truncated done signal to bootstrap the value of continuing beyond the
     limit.
   """
+  scale_rewards_by_dt: bool = True
+  """Whether to multiply rewards by the environment step duration (dt).
+
+  When True (default), reward values are scaled by step_dt to normalize cumulative
+  episodic rewards across different simulation frequencies. Set to False for
+  algorithms that expect unscaled reward signals (e.g., HER, static reward scaling).
+  """
 
 
 class ManagerBasedRlEnv:
@@ -236,7 +243,9 @@ def load_managers(self) -> None:
 
     self.termination_manager = TerminationManager(self.cfg.terminations, self)
     print_info(f"[INFO] {self.termination_manager}")
-    self.reward_manager = RewardManager(self.cfg.rewards, self)
+    self.reward_manager = RewardManager(
+      self.cfg.rewards, self, scale_by_dt=self.cfg.scale_rewards_by_dt
+    )
     print_info(f"[INFO] {self.reward_manager}")
     if self.cfg.curriculum is not None:
       self.curriculum_manager = CurriculumManager(self.cfg.curriculum, self)
diff --git a/src/mjlab/managers/reward_manager.py b/src/mjlab/managers/reward_manager.py
index 10adfcd97..51cf8dfd0 100644
--- a/src/mjlab/managers/reward_manager.py
+++ b/src/mjlab/managers/reward_manager.py
@@ -16,12 +16,39 @@
 
 
 class RewardManager(ManagerBase):
+  """Manages reward computation by aggregating weighted reward terms.
+
+  Reward Scaling Behavior:
+    By default, rewards are scaled by the environment step duration (dt). This
+    normalizes cumulative episodic rewards across different simulation frequencies.
+    The scaling can be disabled via the ``scale_by_dt`` parameter.
+
+    When ``scale_by_dt=True`` (default):
+      - ``reward_buf`` (returned by ``compute()``) = raw_value * weight * dt
+      - ``_episode_sums`` (cumulative rewards) are scaled by dt
+      - ``Episode_Reward/*`` logged metrics are scaled by dt
+
+    When ``scale_by_dt=False``:
+      - ``reward_buf`` = raw_value * weight (no dt scaling)
+
+    Regardless of the scaling setting:
+      - ``_step_reward`` (via ``get_active_iterable_terms()``) always contains
+        the unscaled reward rate (raw_value * weight)
+  """
+
   _env: ManagerBasedRlEnv
 
-  def __init__(self, cfg: dict[str, RewardTermCfg], env: ManagerBasedRlEnv):
+  def __init__(
+    self,
+    cfg: dict[str, RewardTermCfg],
+    env: ManagerBasedRlEnv,
+    *,
+    scale_by_dt: bool = True,
+  ):
     self._term_names: list[str] = list()
     self._term_cfgs: list[RewardTermCfg] = list()
     self._class_term_cfgs: list[RewardTermCfg] = list()
+    self._scale_by_dt = scale_by_dt
 
     self.cfg = deepcopy(cfg)
     super().__init__(env=env)
@@ -76,18 +103,19 @@ def reset(
 
   def compute(self, dt: float) -> torch.Tensor:
     self._reward_buf[:] = 0.0
+    scale = dt if self._scale_by_dt else 1.0
     for term_idx, (name, term_cfg) in enumerate(
       zip(self._term_names, self._term_cfgs, strict=False)
     ):
       if term_cfg.weight == 0.0:
         self._step_reward[:, term_idx] = 0.0
         continue
-      value = term_cfg.func(self._env, **term_cfg.params) * term_cfg.weight * dt
+      value = term_cfg.func(self._env, **term_cfg.params) * term_cfg.weight * scale
       # NaN/Inf can occur from corrupted physics state; zero them to avoid policy crash.
       value = torch.nan_to_num(value, nan=0.0, posinf=0.0, neginf=0.0)
       self._reward_buf += value
       self._episode_sums[name] += value
-      self._step_reward[:, term_idx] = value / dt
+      self._step_reward[:, term_idx] = value / scale
     return self._reward_buf
 
   def get_active_iterable_terms(self, env_idx):
diff --git a/tests/test_rewards.py b/tests/test_rewards.py
index e6394373f..b98ca5c02 100644
--- a/tests/test_rewards.py
+++ b/tests/test_rewards.py
@@ -271,3 +271,63 @@ def neginf_reward(env):
 
   assert not torch.isinf(rewards).any()
   assert rewards[2] == 0.0
+
+
+def test_reward_scaling_enabled(mock_env):
+  """Test that rewards are scaled by dt when scale_by_dt=True (default)."""
+
+  def constant_reward(env):
+    return torch.ones(env.num_envs, device=env.device)
+
+  cfg = {"term": RewardTermCfg(func=constant_reward, weight=2.0, params={})}
+  manager = RewardManager(cfg, mock_env, scale_by_dt=True)
+
+  dt = 0.02
+  rewards = manager.compute(dt=dt)
+
+  # With scaling: reward = raw_value (1.0) * weight (2.0) * dt (0.02) = 0.04
+  expected = 1.0 * 2.0 * dt
+  assert torch.allclose(rewards, torch.full((4,), expected))
+
+  # _step_reward should be unscaled (raw_value * weight)
+  step_reward = manager._step_reward[:, 0]
+  expected_step = 1.0 * 2.0
+  assert torch.allclose(step_reward, torch.full((4,), expected_step))
+
+
+def test_reward_scaling_disabled(mock_env):
+  """Test that rewards are not scaled by dt when scale_by_dt=False."""
+
+  def constant_reward(env):
+    return torch.ones(env.num_envs, device=env.device)
+
+  cfg = {"term": RewardTermCfg(func=constant_reward, weight=2.0, params={})}
+  manager = RewardManager(cfg, mock_env, scale_by_dt=False)
+
+  dt = 0.02
+  rewards = manager.compute(dt=dt)
+
+  # Without scaling: reward = raw_value (1.0) * weight (2.0) = 2.0
+  expected = 1.0 * 2.0
+  assert torch.allclose(rewards, torch.full((4,), expected))
+
+  # _step_reward should still be unscaled (same as reward when not scaling)
+  step_reward = manager._step_reward[:, 0]
+  assert torch.allclose(step_reward, torch.full((4,), expected))
+
+
+def test_reward_scaling_default_is_enabled(mock_env):
+  """Test that scale_by_dt defaults to True for backward compatibility."""
+
+  def constant_reward(env):
+    return torch.ones(env.num_envs, device=env.device)
+
+  cfg = {"term": RewardTermCfg(func=constant_reward, weight=1.0, params={})}
+  # Don't pass scale_by_dt - should default to True
+  manager = RewardManager(cfg, mock_env)
+
+  dt = 0.01
+  rewards = manager.compute(dt=dt)
+
+  # Default (scaling enabled): reward = 1.0 * 1.0 * 0.01 = 0.01
+  assert torch.allclose(rewards, torch.full((4,), 0.01))