huggingface · qgallouedec · Apr 15, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/trl/chat_template_utils.py b/trl/chat_template_utils.py
@@ -627,7 +627,7 @@ def _validate_tool_calls(tool_calls: list | None) -> None:
                 tool_call["arguments"] = {}
 
 
-def parse_response(processing_class: PreTrainedTokenizer | ProcessorMixin, ids: list[int]) -> dict:
+def parse_response(tokenizer: PreTrainedTokenizer, ids: list[int]) -> dict:
     r"""
     Parse a token sequence into structured response dictionaries with fallback handling.
 
@@ -637,11 +637,9 @@ def parse_response(processing_class: PreTrainedTokenizer | ProcessorMixin, ids:
     Also removes incorrectly appended EOS tokens from tool call content when present, and validates tool_calls to
     ensure all required fields exist.
 
-    For VLM processors, automatically uses the inner tokenizer for parsing.
-
     Args:
-        processing_class (`PreTrainedTokenizer` or VLM processor):
-            Tokenizer or processor with a `parse_response()` method (directly or via inner tokenizer).
+        tokenizer (`PreTrainedTokenizer`):
+            Tokenizer with a `parse_response()` method.
         ids (`list[int]`):
             List of token sequences.
 
@@ -662,8 +660,6 @@ def parse_response(processing_class: PreTrainedTokenizer | ProcessorMixin, ids:
     {'role': 'assistant', 'content': '', 'tool_calls': [{'type': 'function', 'function': {'name': 'multiply', 'arguments': {'a': 3, 'b': 4}}}]}
     ```
     """
-    # VLM processors don't have parse_response directly; use the inner tokenizer
-    tokenizer = getattr(processing_class, "tokenizer", processing_class)
     try:
         parsed = tokenizer.parse_response(ids)
         # Hotfix: remove incorrectly appended EOS token from tool calls

diff --git a/trl/experimental/dppo/dppo_trainer.py b/trl/experimental/dppo/dppo_trainer.py
@@ -592,9 +592,8 @@ async def _run_async_tools(async_coros):
                 completion_ids[idx_with_tool] = pct[prompt_length:] + post_tool_ids[idx]
 
             # Decode post-tool completions
-            post_tool_completions = [
-                parse_response(self.processing_class, ids) if ids else {} for ids in post_tool_ids
-            ]
+            tokenizer = self.processing_class.tokenizer if self._is_vlm else self.processing_class
+            post_tool_completions = [parse_response(tokenizer, ids) if ids else {} for ids in post_tool_ids]
 
             for idx in range(len(idxs_with_tool)):
                 idx_with_tool = idxs_with_tool[idx]
@@ -674,7 +673,7 @@ def _generate(self, prompts: list):
                 and hasattr(tokenizer, "response_schema")  # attribute not set by default for now
                 and tokenizer.response_schema is not None  # only works if the tokenizer has a schema
             ):
-                completions = [[parse_response(self.processing_class, ids)] for ids in completion_ids]
+                completions = [[parse_response(tokenizer, ids)] for ids in completion_ids]
             else:
                 contents = self.processing_class.batch_decode(completion_ids, skip_special_tokens=True)
                 completions = [[{"role": "assistant", "content": content}] for content in contents]

diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
@@ -1657,10 +1657,9 @@ async def _run_async_tools(async_coros):
                 pct = prompt_completion_tool_ids[idx]  # = prompt-completion-tool
                 completion_ids[idx_with_tool] = pct[prompt_length:] + post_tool_ids[idx]
 
-            # Decode post-tool completions.
-            post_tool_completions = [
-                parse_response(self.processing_class, ids) if ids else {} for ids in post_tool_ids
-            ]
+            # Decode post-tool completions
+            tokenizer = self.processing_class.tokenizer if self._is_vlm else self.processing_class
+            post_tool_completions = [parse_response(tokenizer, ids) if ids else {} for ids in post_tool_ids]
 
             # Add post-tool completions to the existing completions
             for idx in range(len(idxs_with_tool)):
@@ -1716,7 +1715,7 @@ def _generate(self, prompts: list):
                 and hasattr(tokenizer, "response_schema")  # attribute not set by default for now
                 and tokenizer.response_schema is not None  # only works if the tokenizer has a schema
             ):
-                completions = [[parse_response(self.processing_class, ids)] for ids in completion_ids]
+                completions = [[parse_response(tokenizer, ids)] for ids in completion_ids]
             else:
                 contents = self.processing_class.batch_decode(completion_ids, skip_special_tokens=True)
                 completions = [[{"role": "assistant", "content": content}] for content in contents]