Add experiment 228 with learning rate warmup and cooldown

corbt · claude · corbt · commit b171eb353aa3 · 2025-07-15T17:59:20.000-07:00
- Add warmup_length and cooldown_length fields to ProjectPolicyConfig - Update train.py to use adjust_lr function with batch-specific learning rates - Add experiment 228 with 20-step warmup and cooldown starting at step 20 This experiment will test whether warmup/cooldown improves training compared to our baseline constant learning rate approach. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/examples/art-e/all_experiments.py b/examples/art-e/all_experiments.py
@@ -212,4 +212,9 @@
 
 models["227"] = models["008"].model_copy(deep=True)
 models["227"].name = "email-agent-227"
-models["220"].base_model = "willcb/Qwen3-14B"
+models["227"].base_model = "willcb/Qwen3-14B"
+
+models["228"] = models["008"].model_copy(deep=True)
+models["228"].name = "email-agent-228"
+models["228"].config.warmup_length = 20
+models["228"].config.cooldown_length = -20
diff --git a/examples/art-e/art_e/project_types.py b/examples/art-e/art_e/project_types.py
@@ -1,5 +1,5 @@
 from pydantic import BaseModel
-from typing import Literal
+from typing import Literal, Union
 
 
 class ProjectPolicyConfig(BaseModel):
@@ -13,6 +13,8 @@ class ProjectPolicyConfig(BaseModel):
     trajectories_per_group: int = 6
     groups_per_step: int = 1
     learning_rate: float = 1.2e-5
+    warmup_length: Union[int, float] = 0
+    cooldown_length: Union[int, float] = 0
     eval_steps: int = 30
     val_set_size: int = 100
     training_dataset_size: int = 4000
diff --git a/examples/art-e/art_e/train.py b/examples/art-e/art_e/train.py
@@ -7,7 +7,7 @@
 from art_e.data.query_iterators import load_synthetic_queries
 from art_e.data.types_enron import SyntheticQuery
 from art_e.data.local_email_db import generate_database
-from art.utils import iterate_dataset
+from art.utils import iterate_dataset, adjust_lr
 from art_e.project_types import ProjectPolicyConfig
 from art_e.evaluate.benchmark import benchmark_model
 import os
@@ -139,9 +139,17 @@ async def judge_after_each(
                     )
                     continue  # Proceed to next batch/epoch without training.
 
+            # Calculate learning rate for this batch
+            current_lr = adjust_lr(
+                batch,
+                learning_rate=model.config.learning_rate,
+                warmup_length=model.config.warmup_length,
+                cooldown_length=model.config.cooldown_length,
+            )
+
             await model.train(
                 groups,
-                config=art.TrainConfig(learning_rate=model.config.learning_rate),
+                config=art.TrainConfig(learning_rate=current_lr),
                 _config=art.dev.TrainConfig(
                     allow_training_without_logprobs=model.config.messages_only,
                     precalculate_logprobs=model.config.precalculate_logprobs,