diff --git a/tests/experimental/test_gold_trainer.py b/tests/experimental/test_gold_trainer.py index 619e0182626..f216badf6e3 100644 --- a/tests/experimental/test_gold_trainer.py +++ b/tests/experimental/test_gold_trainer.py @@ -486,7 +486,9 @@ def test_gold_trainer_init_defaults_vllm_max_model_length_to_max_length(monkeypa class DummyStudentModel: def __init__(self): - self.config = SimpleNamespace(_name_or_path="student", vocab_size=17) + config = SimpleNamespace(_name_or_path="student", vocab_size=17) + config.get_text_config = lambda: config + self.config = config self.generation_config = SimpleNamespace(eos_token_id=2) self.name_or_path = "student" diff --git a/trl/experimental/distillation/distillation_trainer.py b/trl/experimental/distillation/distillation_trainer.py index a4879f62b19..6134c7957e5 100644 --- a/trl/experimental/distillation/distillation_trainer.py +++ b/trl/experimental/distillation/distillation_trainer.py @@ -549,7 +549,7 @@ def __init__( # ── Prepare teacher model (after super().__init__ so accelerator is ready) ── if teacher_model is not None: if self._local_teacher_tokenizer_matches_student: - teacher_model.resize_token_embeddings(self.model.config.vocab_size) + teacher_model.resize_token_embeddings(self.model.config.get_text_config().vocab_size) if self.is_deepspeed_enabled: self.teacher_model = prepare_deepspeed(teacher_model, self.accelerator) else: diff --git a/trl/experimental/gold/gold_trainer.py b/trl/experimental/gold/gold_trainer.py index 9f36d610612..d1b8da0df76 100644 --- a/trl/experimental/gold/gold_trainer.py +++ b/trl/experimental/gold/gold_trainer.py @@ -845,7 +845,7 @@ def __init__( if args.disable_dropout: disable_dropout_in_model(self.model) if not args.use_uld_loss: - teacher_model.resize_token_embeddings(self.model.config.vocab_size) + teacher_model.resize_token_embeddings(self.model.config.get_text_config().vocab_size) if self.is_deepspeed_enabled: self.teacher_model = prepare_deepspeed(teacher_model, self.accelerator)