From 8852bd22e4e5ad67b316e15ece09e921cb1dba39 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 16 Apr 2026 09:46:04 +0200 Subject: [PATCH 01/15] Set tokenizer attribute in OnlineDPO --- .../online_dpo/online_dpo_trainer.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/trl/experimental/online_dpo/online_dpo_trainer.py b/trl/experimental/online_dpo/online_dpo_trainer.py index 57e8701bd70..19da0453029 100644 --- a/trl/experimental/online_dpo/online_dpo_trainer.py +++ b/trl/experimental/online_dpo/online_dpo_trainer.py @@ -349,18 +349,18 @@ def __init__( # Handle pad token for processors or tokenizers if isinstance(processing_class, ProcessorMixin): - tokenizer = processing_class.tokenizer + self._tokenizer = processing_class.tokenizer elif isinstance(processing_class, PreTrainedTokenizerBase): - tokenizer = processing_class + self._tokenizer = processing_class else: raise TypeError("The `processing_class` must be either a `PreTrainedTokenizerBase` or a `ProcessorMixin`") - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token + if self._tokenizer.pad_token is None: + self._tokenizer.pad_token = self._tokenizer.eos_token - self.pad_token = tokenizer.pad_token - self.pad_token_id = tokenizer.pad_token_id - self.eos_token_id = tokenizer.eos_token_id + self.pad_token = self._tokenizer.pad_token + self.pad_token_id = self._tokenizer.pad_token_id + self.eos_token_id = self._tokenizer.eos_token_id # Vision tokens for VLM support self.image_token_id = getattr(processing_class, "image_token_id", None) @@ -369,7 +369,7 @@ def __init__( # Get the image token string for token collapsing self.image_token = None if self.image_token_id is not None: - self.image_token = tokenizer.decode([self.image_token_id]) + self.image_token = self._tokenizer.decode([self.image_token_id]) # Define the collator if not provided if data_collator is None: @@ -507,7 +507,7 @@ def __init__( "max_new_tokens": args.max_new_tokens, "do_sample": True, "pad_token_id": self.pad_token_id, - "bos_token_id": tokenizer.bos_token_id, + "bos_token_id": self._tokenizer.bos_token_id, "eos_token_id": self.eos_token_id, "temperature": self.temperature, "top_k": self.top_k, From ee2fd2dbec2a5623f4b1cdcb67d706ea86acc618 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 16 Apr 2026 11:00:05 +0200 Subject: [PATCH 02/15] Replace self.pad_token_id and self.eos_token_id --- .../online_dpo/online_dpo_trainer.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/trl/experimental/online_dpo/online_dpo_trainer.py b/trl/experimental/online_dpo/online_dpo_trainer.py index 19da0453029..659e01880f6 100644 --- a/trl/experimental/online_dpo/online_dpo_trainer.py +++ b/trl/experimental/online_dpo/online_dpo_trainer.py @@ -359,8 +359,6 @@ def __init__( self._tokenizer.pad_token = self._tokenizer.eos_token self.pad_token = self._tokenizer.pad_token - self.pad_token_id = self._tokenizer.pad_token_id - self.eos_token_id = self._tokenizer.eos_token_id # Vision tokens for VLM support self.image_token_id = getattr(processing_class, "image_token_id", None) @@ -373,7 +371,7 @@ def __init__( # Define the collator if not provided if data_collator is None: - data_collator = DPODataCollatorWithPadding(pad_token_id=self.pad_token_id) + data_collator = DPODataCollatorWithPadding(pad_token_id=self._tokenizer.pad_token_id) # Transformers explicitly set use_reentrant=True in the past to silence a PyTorch warning, but the default was # never updated once PyTorch switched to recommending use_reentrant=False. Until that change lands upstream @@ -506,9 +504,9 @@ def __init__( generation_kwargs = { "max_new_tokens": args.max_new_tokens, "do_sample": True, - "pad_token_id": self.pad_token_id, + "pad_token_id": self._tokenizer.pad_token_id, "bos_token_id": self._tokenizer.bos_token_id, - "eos_token_id": self.eos_token_id, + "eos_token_id": self._tokenizer.eos_token_id, "temperature": self.temperature, "top_k": self.top_k, "top_p": self.top_p, @@ -584,8 +582,8 @@ def _enable_gradient_checkpointing(self, model: PreTrainedModel, args: OnlineDPO return model def _generate_vllm(self, prompts, images=None): - eos_token_id = self.eos_token_id - pad_token_id = self.pad_token_id + eos_token_id = self._tokenizer.eos_token_id + pad_token_id = self._tokenizer.pad_token_id # Generate completion_ids and prompt_ids based on mode if self.vllm_mode == "server": @@ -894,8 +892,8 @@ def process_vision_row( def _generate(self, model, prompts, images=None): """Generate completions using the model""" device = next(model.parameters()).device - eos_token_id = self.eos_token_id - pad_token_id = self.pad_token_id + eos_token_id = self._tokenizer.eos_token_id + pad_token_id = self._tokenizer.pad_token_id # Apply chat template and tokenize the input inputs = [{"prompt": prompt} for prompt in prompts] @@ -1119,7 +1117,7 @@ def training_step( else: prompt_ids, prompt_mask, completion_ids, completion_mask = self._generate(model, prompts, images) - contain_eos_token = torch.any(completion_ids == self.eos_token_id, dim=-1) + contain_eos_token = torch.any(completion_ids == self._tokenizer.eos_token_id, dim=-1) # Extract vision inputs if available for VLM support vision_inputs = None From 5375253b6853bbbe55680dae3d679bf633dad1b6 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 16 Apr 2026 11:04:24 +0200 Subject: [PATCH 03/15] Use self._tokenizer --- trl/experimental/online_dpo/online_dpo_trainer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/trl/experimental/online_dpo/online_dpo_trainer.py b/trl/experimental/online_dpo/online_dpo_trainer.py index 659e01880f6..13cc70f5cc7 100644 --- a/trl/experimental/online_dpo/online_dpo_trainer.py +++ b/trl/experimental/online_dpo/online_dpo_trainer.py @@ -922,9 +922,7 @@ def _generate(self, model, prompts, images=None): else: # If the chat template doesn't use the image token, remove all instances if self.vision_end_token_id is not None: - escaped_eoi_token = re.escape( - self.processing_class.tokenizer.decode([self.vision_end_token_id]) - ) + escaped_eoi_token = re.escape(self._tokenizer.decode([self.vision_end_token_id])) prompts_text = [ re.sub(rf"({escaped_img_token})+{escaped_eoi_token}", "", text) for text in prompts_text ] From 9c535e01fc62f4479e9371a79d3c2cab519c973a Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Apr 2026 11:45:18 +0200 Subject: [PATCH 04/15] Set tokenizer attribute in SDFT --- trl/experimental/sdft/sdft_trainer.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/trl/experimental/sdft/sdft_trainer.py b/trl/experimental/sdft/sdft_trainer.py index 5bf6095c2a0..c75a205bbcc 100644 --- a/trl/experimental/sdft/sdft_trainer.py +++ b/trl/experimental/sdft/sdft_trainer.py @@ -209,17 +209,17 @@ def __init__( ) if isinstance(processing_class, ProcessorMixin): - tokenizer = processing_class.tokenizer + self._tokenizer = processing_class.tokenizer elif isinstance(processing_class, PreTrainedTokenizerBase): - tokenizer = processing_class + self._tokenizer = processing_class else: raise TypeError("The `processing_class` must be either a `PreTrainedTokenizerBase` or a `ProcessorMixin`") - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token + if self._tokenizer.pad_token is None: + self._tokenizer.pad_token = self._tokenizer.eos_token - self.pad_token_id = tokenizer.pad_token_id - self.eos_token_id = tokenizer.eos_token_id + self.pad_token_id = self._tokenizer.pad_token_id + self.eos_token_id = self._tokenizer.eos_token_id self.max_prompt_length = args.max_prompt_length self.max_completion_length = args.max_completion_length self.num_generations = args.num_generations @@ -239,9 +239,9 @@ def __init__( generation_kwargs = { "max_new_tokens": self.max_completion_length, "do_sample": True, - "pad_token_id": tokenizer.pad_token_id, - "bos_token_id": tokenizer.bos_token_id, - "eos_token_id": tokenizer.eos_token_id, + "pad_token_id": self._tokenizer.pad_token_id, + "bos_token_id": self._tokenizer.bos_token_id, + "eos_token_id": self._tokenizer.eos_token_id, "temperature": args.temperature, "top_p": args.top_p, "top_k": args.top_k, From 0f85a792d8a61afebcf3a2833f1453329707a5ec Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Apr 2026 11:47:19 +0200 Subject: [PATCH 05/15] Set tokenizer attribute in SelfDistillation --- .../base_self_distillation_trainer.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/trl/experimental/self_distillation/base_self_distillation_trainer.py b/trl/experimental/self_distillation/base_self_distillation_trainer.py index bd9abb95164..e420fa9a9fe 100644 --- a/trl/experimental/self_distillation/base_self_distillation_trainer.py +++ b/trl/experimental/self_distillation/base_self_distillation_trainer.py @@ -113,17 +113,17 @@ def __init__( ) if isinstance(processing_class, ProcessorMixin): - tokenizer = processing_class.tokenizer + self._tokenizer = processing_class.tokenizer elif isinstance(processing_class, PreTrainedTokenizerBase): - tokenizer = processing_class + self._tokenizer = processing_class else: raise TypeError("The `processing_class` must be either a `PreTrainedTokenizerBase` or a `ProcessorMixin`") - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token + if self._tokenizer.pad_token is None: + self._tokenizer.pad_token = self._tokenizer.eos_token - self.pad_token_id = tokenizer.pad_token_id - self.eos_token_id = tokenizer.eos_token_id + self.pad_token_id = self._tokenizer.pad_token_id + self.eos_token_id = self._tokenizer.eos_token_id self.temperature = args.temperature self.max_prompt_length = args.max_prompt_length self.max_completion_length = args.max_completion_length @@ -151,9 +151,9 @@ def __init__( generation_kwargs = { "max_new_tokens": self.max_completion_length, "do_sample": True, - "pad_token_id": tokenizer.pad_token_id, - "bos_token_id": tokenizer.bos_token_id, - "eos_token_id": tokenizer.eos_token_id, + "pad_token_id": self._tokenizer.pad_token_id, + "bos_token_id": self._tokenizer.bos_token_id, + "eos_token_id": self._tokenizer.eos_token_id, "temperature": args.temperature, "top_p": args.top_p, "top_k": args.top_k, From a3554386f9f76139328b2f9a56f7f3471a8b306c Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Apr 2026 11:48:30 +0200 Subject: [PATCH 06/15] Set tokenizer attribute in SSD --- trl/experimental/ssd/ssd_trainer.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/trl/experimental/ssd/ssd_trainer.py b/trl/experimental/ssd/ssd_trainer.py index ea378753653..392be9478c2 100644 --- a/trl/experimental/ssd/ssd_trainer.py +++ b/trl/experimental/ssd/ssd_trainer.py @@ -149,17 +149,17 @@ def __init__( ) if isinstance(processing_class, ProcessorMixin): - tokenizer = processing_class.tokenizer + self._tokenizer = processing_class.tokenizer elif isinstance(processing_class, PreTrainedTokenizerBase): - tokenizer = processing_class + self._tokenizer = processing_class else: raise TypeError("The `processing_class` must be either a `PreTrainedTokenizerBase` or a `ProcessorMixin`") - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token + if self._tokenizer.pad_token is None: + self._tokenizer.pad_token = self._tokenizer.eos_token - self.pad_token_id = tokenizer.pad_token_id - self.eos_token_id = tokenizer.eos_token_id + self.pad_token_id = self._tokenizer.pad_token_id + self.eos_token_id = self._tokenizer.eos_token_id self.max_prompt_length = args.max_prompt_length self.max_completion_length = args.max_completion_length # SSD always samples a single completion per prompt (N=1 in the paper). @@ -177,9 +177,9 @@ def __init__( generation_kwargs = { "max_new_tokens": self.max_completion_length, "do_sample": True, - "pad_token_id": tokenizer.pad_token_id, - "bos_token_id": tokenizer.bos_token_id, - "eos_token_id": tokenizer.eos_token_id, + "pad_token_id": self._tokenizer.pad_token_id, + "bos_token_id": self._tokenizer.bos_token_id, + "eos_token_id": self._tokenizer.eos_token_id, "temperature": args.temperature, "top_p": args.top_p, "top_k": args.top_k, From 4e8448b20309660cc074950eb2d05023ead3c374 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Apr 2026 11:50:01 +0200 Subject: [PATCH 07/15] Set tokenizer attribute in TPO --- trl/experimental/tpo/tpo_trainer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/trl/experimental/tpo/tpo_trainer.py b/trl/experimental/tpo/tpo_trainer.py index 544c0674cca..fc5ae52c2ac 100644 --- a/trl/experimental/tpo/tpo_trainer.py +++ b/trl/experimental/tpo/tpo_trainer.py @@ -345,10 +345,10 @@ def __init__( "The `processing_class` must be a `PreTrainedTokenizerBase`. `TPOTrainer` does not currently " "support vision-language models." ) - tokenizer = processing_class + self._tokenizer = processing_class - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token + if self._tokenizer.pad_token is None: + self._tokenizer.pad_token = self._tokenizer.eos_token if is_peft_available() and is_peft_model(model) and peft_config is not None: raise ValueError( @@ -371,7 +371,7 @@ def __init__( # each step. if data_collator is None: data_collator = DataCollatorForTriplePreference( - pad_token_id=tokenizer.pad_token_id, + pad_token_id=self._tokenizer.pad_token_id, max_length=args.max_length, truncation_mode=args.truncation_mode, pad_to_multiple_of=args.pad_to_multiple_of, From 4cec956ce617614865bd4fa4a5a384f5e23959a8 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Apr 2026 11:53:39 +0200 Subject: [PATCH 08/15] Replace self.pad_token_id and self.eos_token_id in SDFT --- trl/experimental/sdft/sdft_trainer.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/trl/experimental/sdft/sdft_trainer.py b/trl/experimental/sdft/sdft_trainer.py index c75a205bbcc..652524f6aed 100644 --- a/trl/experimental/sdft/sdft_trainer.py +++ b/trl/experimental/sdft/sdft_trainer.py @@ -218,8 +218,6 @@ def __init__( if self._tokenizer.pad_token is None: self._tokenizer.pad_token = self._tokenizer.eos_token - self.pad_token_id = self._tokenizer.pad_token_id - self.eos_token_id = self._tokenizer.eos_token_id self.max_prompt_length = args.max_prompt_length self.max_completion_length = args.max_completion_length self.num_generations = args.num_generations @@ -398,7 +396,7 @@ def _generate_completion_ids(self, prompts: list[Any]) -> tuple[torch.Tensor, to prompt_length = generate_inputs["input_ids"].size(1) completion_ids = prompt_completion_ids[:, prompt_length:] - is_eos = completion_ids == self.eos_token_id + is_eos = completion_ids == self._tokenizer.eos_token_id eos_idx = torch.full((is_eos.size(0),), is_eos.size(1), dtype=torch.long, device=completion_ids.device) eos_idx[is_eos.any(dim=1)] = is_eos.int().argmax(dim=1)[is_eos.any(dim=1)] seq_idx = torch.arange(is_eos.size(1), device=completion_ids.device).expand(is_eos.size(0), -1) @@ -408,7 +406,7 @@ def _generate_completion_ids(self, prompts: list[Any]) -> tuple[torch.Tensor, to completion_ids = [torch.tensor(ids, device=self.accelerator.device) for ids in completion_ids_list] completion_mask = [torch.ones_like(ids, dtype=torch.long) for ids in completion_ids] return ( - pad(completion_ids, padding_value=self.pad_token_id, padding_side="right"), + pad(completion_ids, padding_value=self._tokenizer.pad_token_id, padding_side="right"), pad(completion_mask, padding_value=0, padding_side="right"), ) From 7916f7778af83030c6e8af24071fa91c26dc59dc Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Apr 2026 11:58:20 +0200 Subject: [PATCH 09/15] Remove self.pad_token_id and self.eos_token_id in BaseSelfDistillation --- .../self_distillation/base_self_distillation_trainer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/trl/experimental/self_distillation/base_self_distillation_trainer.py b/trl/experimental/self_distillation/base_self_distillation_trainer.py index e420fa9a9fe..d4bea2f48d3 100644 --- a/trl/experimental/self_distillation/base_self_distillation_trainer.py +++ b/trl/experimental/self_distillation/base_self_distillation_trainer.py @@ -122,8 +122,6 @@ def __init__( if self._tokenizer.pad_token is None: self._tokenizer.pad_token = self._tokenizer.eos_token - self.pad_token_id = self._tokenizer.pad_token_id - self.eos_token_id = self._tokenizer.eos_token_id self.temperature = args.temperature self.max_prompt_length = args.max_prompt_length self.max_completion_length = args.max_completion_length From 3204efbb3ae4ed81325bafeb866cbd57a9a58c29 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Apr 2026 11:58:56 +0200 Subject: [PATCH 10/15] Replace self.pad_token_id in SDPO SuccessfulRolloutTeacherContextBuilder --- trl/experimental/sdpo/sdpo_trainer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/trl/experimental/sdpo/sdpo_trainer.py b/trl/experimental/sdpo/sdpo_trainer.py index ef84a17a44c..66195da4c58 100644 --- a/trl/experimental/sdpo/sdpo_trainer.py +++ b/trl/experimental/sdpo/sdpo_trainer.py @@ -87,7 +87,9 @@ def _tokenize_teacher_messages( teacher_prompt_ids = [ids.to(device) for ids in teacher_prompt_ids_list] teacher_prompt_mask = [torch.ones(len(ids), dtype=torch.long, device=device) for ids in teacher_prompt_ids] return TokenizedPromptBatch( - prompt_ids=pad(teacher_prompt_ids, padding_value=self.trainer.pad_token_id, padding_side="left"), + prompt_ids=pad( + teacher_prompt_ids, padding_value=self.trainer._tokenizer.pad_token_id, padding_side="left" + ), prompt_mask=pad(teacher_prompt_mask, padding_value=0, padding_side="left"), ) @@ -115,7 +117,7 @@ def build( # Use separate variables so the original completion_ids/completion_mask stay unpadded for the # teacher concat (they must match the student's sequence length for logits_to_keep alignment). padded_completion_ids = self.trainer.accelerator.pad_across_processes( - completion_ids, dim=1, pad_index=self.trainer.pad_token_id + completion_ids, dim=1, pad_index=self.trainer._tokenizer.pad_token_id ) all_completion_ids = self.trainer.accelerator.gather(padded_completion_ids) all_prompts = gather_object(prompts) @@ -193,7 +195,7 @@ def build( if demo_idx is None: raise RuntimeError("Expected a successful demonstration index for an active SDPO teacher prompt.") demo_ids = all_completion_ids[demo_idx] - demo_ids = demo_ids[demo_ids != self.trainer.processing_class.pad_token_id] + demo_ids = demo_ids[demo_ids != self.trainer._tokenizer.pad_token_id] demo_text = self.trainer.processing_class.decode(demo_ids, skip_special_tokens=True) if self.trainer.args.remove_thinking_from_demonstration: From 6d8a23facf96e2267596f522fc7fbb9a1b3ea494 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Apr 2026 12:01:00 +0200 Subject: [PATCH 11/15] Replace self.pad_token_id and self.eos_token_id in SSD --- trl/experimental/ssd/ssd_trainer.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/trl/experimental/ssd/ssd_trainer.py b/trl/experimental/ssd/ssd_trainer.py index 392be9478c2..ab70f7dad9c 100644 --- a/trl/experimental/ssd/ssd_trainer.py +++ b/trl/experimental/ssd/ssd_trainer.py @@ -158,8 +158,6 @@ def __init__( if self._tokenizer.pad_token is None: self._tokenizer.pad_token = self._tokenizer.eos_token - self.pad_token_id = self._tokenizer.pad_token_id - self.eos_token_id = self._tokenizer.eos_token_id self.max_prompt_length = args.max_prompt_length self.max_completion_length = args.max_completion_length # SSD always samples a single completion per prompt (N=1 in the paper). @@ -362,7 +360,7 @@ def _generate_completion_ids_vllm(self, prompts: list[Any]) -> tuple[torch.Tenso completion_ids = [torch.tensor(ids, device=device) for ids in completion_ids_list] completion_mask = [torch.ones(len(ids), dtype=torch.long, device=device) for ids in completion_ids_list] return ( - pad(completion_ids, padding_value=self.pad_token_id, padding_side="right"), + pad(completion_ids, padding_value=self._tokenizer.pad_token_id, padding_side="right"), pad(completion_mask, padding_value=0, padding_side="right"), ) @@ -394,7 +392,7 @@ def _generate_completion_ids_transformers(self, prompts: list[Any]) -> tuple[tor prompt_length = generate_inputs["input_ids"].size(1) completion_ids = prompt_completion_ids[:, prompt_length:] - is_eos = completion_ids == self.eos_token_id + is_eos = completion_ids == self._tokenizer.eos_token_id eos_idx = torch.full((is_eos.size(0),), is_eos.size(1), dtype=torch.long, device=completion_ids.device) eos_idx[is_eos.any(dim=1)] = is_eos.int().argmax(dim=1)[is_eos.any(dim=1)] seq_idx = torch.arange(is_eos.size(1), device=completion_ids.device).expand(is_eos.size(0), -1) @@ -404,7 +402,7 @@ def _generate_completion_ids_transformers(self, prompts: list[Any]) -> tuple[tor completion_ids = [torch.tensor(ids, device=self.accelerator.device) for ids in completion_ids_list] completion_mask = [torch.ones_like(ids, dtype=torch.long) for ids in completion_ids] return ( - pad(completion_ids, padding_value=self.pad_token_id, padding_side="right"), + pad(completion_ids, padding_value=self._tokenizer.pad_token_id, padding_side="right"), pad(completion_mask, padding_value=0, padding_side="right"), ) From 09b0a4a8e5e751a3c66dea85738dd72bf6130386 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Apr 2026 15:16:50 +0200 Subject: [PATCH 12/15] Replace self.pad_token_id and self.eos_token_id in OnlineRolloutMixin --- .../self_distillation/online_rollout_mixin.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/trl/experimental/self_distillation/online_rollout_mixin.py b/trl/experimental/self_distillation/online_rollout_mixin.py index 490724582dc..93caf5e2eeb 100644 --- a/trl/experimental/self_distillation/online_rollout_mixin.py +++ b/trl/experimental/self_distillation/online_rollout_mixin.py @@ -110,7 +110,7 @@ def _generate_transformers(self, prompts): prompt_mask = generate_inputs["attention_mask"] prompt_length = prompt_ids.size(1) completion_ids = prompt_completion_ids[:, prompt_length:] - is_eos = completion_ids == self.eos_token_id + is_eos = completion_ids == self._tokenizer.eos_token_id eos_idx = torch.full((is_eos.size(0),), is_eos.size(1), dtype=torch.long, device=completion_ids.device) eos_idx[is_eos.any(dim=1)] = is_eos.int().argmax(dim=1)[is_eos.any(dim=1)] seq_idx = torch.arange(is_eos.size(1), device=completion_ids.device).expand(is_eos.size(0), -1) @@ -173,15 +173,17 @@ def _generate_and_score_completions(self, inputs): prompt_ids = [torch.tensor(ids) for ids in prompt_ids_list] prompt_mask = [torch.ones_like(ids, dtype=torch.long) for ids in prompt_ids] - prompt_ids = pad(prompt_ids, padding_value=self.pad_token_id, padding_side="left").to(device=device) + prompt_ids = pad(prompt_ids, padding_value=self._tokenizer.pad_token_id, padding_side="left").to(device=device) prompt_mask = pad(prompt_mask, padding_value=0, padding_side="left").to(device=device) completion_ids = [torch.tensor(ids) for ids in completion_ids_list] completion_mask = [torch.ones_like(ids, dtype=torch.long) for ids in completion_ids] - completion_ids = pad(completion_ids, padding_value=self.pad_token_id, padding_side="right").to(device=device) + completion_ids = pad(completion_ids, padding_value=self._tokenizer.pad_token_id, padding_side="right").to( + device=device + ) completion_mask = pad(completion_mask, padding_value=0, padding_side="right").to(device=device) if self.mask_truncated_completions: - eos_and_pad = [self.eos_token_id, self.pad_token_id] + eos_and_pad = [self._tokenizer.eos_token_id, self._tokenizer.pad_token_id] is_truncated = torch.tensor([ids[-1] not in eos_and_pad for ids in completion_ids_list], device=device) completion_mask = completion_mask * (~is_truncated).unsqueeze(1).int() @@ -240,7 +242,7 @@ def _generate_and_score_completions(self, inputs): self._metrics[mode]["completions/min_length"].append(agg_completion_lengths.float().min().item()) self._metrics[mode]["completions/max_length"].append(agg_completion_lengths.float().max().item()) - eos_and_pad = [self.eos_token_id, self.pad_token_id] + eos_and_pad = [self._tokenizer.eos_token_id, self._tokenizer.pad_token_id] is_truncated = torch.tensor([ids[-1] not in eos_and_pad for ids in completion_ids_list], device=device) agg_is_truncated = self.accelerator.gather(is_truncated) self._metrics[mode]["completions/clipped_ratio"].append(agg_is_truncated.float().mean().item()) From 8a81c5d2c50aa2ff3986115d67081d15e64399ed Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Apr 2026 15:19:02 +0200 Subject: [PATCH 13/15] Use self._tokenizer in PromptTokenizer --- trl/experimental/self_distillation/teacher_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trl/experimental/self_distillation/teacher_context.py b/trl/experimental/self_distillation/teacher_context.py index 5e1020c91a7..2448b78a712 100644 --- a/trl/experimental/self_distillation/teacher_context.py +++ b/trl/experimental/self_distillation/teacher_context.py @@ -80,6 +80,6 @@ def tokenize_prompts(self, prompts: list[Any]) -> TokenizedPromptBatch: prompt_ids = [torch.tensor(ids, device=self.trainer.accelerator.device) for ids in prompt_ids] prompt_mask = [torch.ones_like(ids, dtype=torch.long) for ids in prompt_ids] return TokenizedPromptBatch( - prompt_ids=pad(prompt_ids, padding_value=self.trainer.pad_token_id, padding_side="left"), + prompt_ids=pad(prompt_ids, padding_value=self.trainer._tokenizer.pad_token_id, padding_side="left"), prompt_mask=pad(prompt_mask, padding_value=0, padding_side="left"), ) From d335ec18e4752be63fccf801fde0528c1a60fb73 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 28 Apr 2026 11:18:12 +0200 Subject: [PATCH 14/15] Set tokenizer attribute in KTO --- trl/experimental/kto/kto_trainer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/trl/experimental/kto/kto_trainer.py b/trl/experimental/kto/kto_trainer.py index 02d58bcaa61..a7b42b1afa9 100644 --- a/trl/experimental/kto/kto_trainer.py +++ b/trl/experimental/kto/kto_trainer.py @@ -270,13 +270,13 @@ def __init__( if processing_class is None: processing_class = AutoProcessor.from_pretrained(get_config_model_id(model.config)) if isinstance(processing_class, ProcessorMixin): - tokenizer = processing_class.tokenizer + self._tokenizer = processing_class.tokenizer elif isinstance(processing_class, PreTrainedTokenizerBase): - tokenizer = processing_class + self._tokenizer = processing_class else: raise TypeError("The `processing_class` must be either a `PreTrainedTokenizerBase` or a `ProcessorMixin`") - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token + if self._tokenizer.pad_token is None: + self._tokenizer.pad_token = self._tokenizer.eos_token # PEFT if peft_config is not None: @@ -345,7 +345,7 @@ def __init__( if data_collator is None: data_collator = DataCollatorForUnpairedPreference( - pad_token_id=tokenizer.pad_token_id, + pad_token_id=self._tokenizer.pad_token_id, max_length=max_length, ) From ba22aad71471378c4a527a3ff43ec8dcd9a0e8c3 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 28 Apr 2026 11:24:23 +0200 Subject: [PATCH 15/15] Use self._tokenizer in _prepare_dataset --- trl/experimental/kto/kto_trainer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/trl/experimental/kto/kto_trainer.py b/trl/experimental/kto/kto_trainer.py index a7b42b1afa9..05f74a68a42 100644 --- a/trl/experimental/kto/kto_trainer.py +++ b/trl/experimental/kto/kto_trainer.py @@ -569,8 +569,6 @@ def _prepare_dataset( map_kwargs["desc"] = f"Unpairing {dataset_name} dataset" dataset = unpair_preference_dataset(dataset, **map_kwargs) - tokenizer = getattr(processing_class, "tokenizer", processing_class) - # Add EOS token if needed: non-conversational only first_example = next(iter(dataset)) if not is_conversational(first_example): @@ -582,7 +580,7 @@ def add_eos(example, eos_token): example["completion"] = example["completion"] + eos_token return example - dataset = dataset.map(add_eos, fn_kwargs={"eos_token": tokenizer.eos_token}, **map_kwargs) + dataset = dataset.map(add_eos, fn_kwargs={"eos_token": self._tokenizer.eos_token}, **map_kwargs) # Tokenize dataset if isinstance(dataset, Dataset): # `IterableDataset.map` does not support `desc`