Regenerate modeling/processing from updated modular; fix copyright header

artemspector · claude · artemspector · commit 3bf488f2b977 · 2026-04-30T18:25:41.000+03:00
Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/src/transformers/models/granite4_vision/modeling_granite4_vision.py b/src/transformers/models/granite4_vision/modeling_granite4_vision.py
@@ -30,7 +30,7 @@
 
 from ... import initialization as init
 from ...activations import ACT2FN
-from ...cache_utils import Cache, DynamicCache
+from ...cache_utils import Cache
 from ...generation import GenerationMixin
 from ...image_processing_utils import select_best_resolution
 from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub, use_kernelized_func
@@ -42,6 +42,7 @@
 from ...processing_utils import Unpack
 from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, torch_compilable_check
 from ...utils.generic import maybe_autocast, merge_with_config_defaults
+from ...utils.output_capturing import capture_outputs
 from ..auto import AutoModel
 from .configuration_granite4_vision import Granite4VisionConfig, Granite4VisionTextConfig
 
@@ -80,7 +81,7 @@ class Granite4VisionCausalLMOutputWithPast(ModelOutput):
 
 
 @dataclass
-class Granite4VisionImageFeaturesOutput(ModelOutput):
+class Granite4VisionImageFeaturesOutput(BaseModelOutputWithPooling):
     """
     Output of `Granite4VisionModel.get_image_features`.
 
@@ -590,6 +591,7 @@ def __init__(self, config: Granite4VisionTextConfig):
         # Initialize weights and apply final processing
         self.post_init()
 
+    @capture_outputs
     @auto_docstring
     def forward(
         self,
@@ -618,9 +620,6 @@ def forward(
 
         inputs_embeds = inputs_embeds * self.embedding_multiplier
 
-        if use_cache and past_key_values is None:
-            past_key_values = DynamicCache(config=self.config)
-
         if position_ids is None:
             past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
             position_ids = (
@@ -916,6 +915,9 @@ def get_image_features(
         elif pixel_values.dim() != 4:
             raise ValueError(f"pixel_values of shape {pixel_values.shape}, expect to be of 4 or 5 dimensions")
 
+        output_hidden_states = kwargs.pop("output_hidden_states", None)
+        if output_hidden_states is None:
+            output_hidden_states = getattr(self.config, "output_hidden_states", False)
         vision_outputs = self.vision_tower(pixel_values, output_hidden_states=True, **kwargs)
 
         # Deepstack features: extract from multiple vision layers, downsample via interpolation
@@ -958,7 +960,10 @@ def get_image_features(
 
                 all_features.append((llm_layer, packed_group))
 
-        return Granite4VisionImageFeaturesOutput(deepstack_features=all_features)
+        return Granite4VisionImageFeaturesOutput(
+            deepstack_features=all_features,
+            hidden_states=vision_outputs.hidden_states if output_hidden_states else None,
+        )
 
     def get_placeholder_mask(
         self, input_ids: torch.LongTensor, inputs_embeds: torch.FloatTensor, image_features: torch.FloatTensor
diff --git a/src/transformers/models/granite4_vision/modular_granite4_vision.py b/src/transformers/models/granite4_vision/modular_granite4_vision.py
@@ -25,7 +25,7 @@
 from ...configuration_utils import PreTrainedConfig
 from ...image_processing_utils import select_best_resolution
 from ...masking_utils import create_causal_mask
-from ...modeling_outputs import BaseModelOutputWithPast, BaseModelOutputWithPooling, ModelOutput
+from ...modeling_outputs import BaseModelOutputWithPast, BaseModelOutputWithPooling
 from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS
 from ...processing_utils import Unpack
 from ...utils import TransformersKwargs, auto_docstring, can_return_tuple
diff --git a/src/transformers/models/granite4_vision/processing_granite4_vision.py b/src/transformers/models/granite4_vision/processing_granite4_vision.py
@@ -4,7 +4,7 @@
 #             the file from the modular. If any change should be done, please apply the change to the
 #                          modular_granite4_vision.py file directly. One of our CI enforces this.
 #                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
-# Copyright 2025 IBM. All rights reserved.
+# Copyright 2026 IBM and The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@`
`4`	`4`	`# the file from the modular. If any change should be done, please apply the change to the`
`5`	`5`	`# modular_granite4_vision.py file directly. One of our CI enforces this.`
`6`	`6`	`# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨`
`7`		`-# Copyright 2025 IBM. All rights reserved.`
	`7`	`+# Copyright 2026 IBM and The HuggingFace Team. All rights reserved.`
`8`	`8`	`#`
`9`	`9`	`# Licensed under the Apache License, Version 2.0 (the "License");`
`10`	`10`	`# you may not use this file except in compliance with the License.`