make transformer fix backward compatible (#1794)

shanjiaz · dsikka · gemini-code-assist[bot] · dsikka · commit 97a0414a2411 · 2025-09-21T19:01:54.000Z
SUMMARY:
Use similar logic in the code base to determine which kv parameter name
to use.


TEST PLAN:
Tested locally, kv cache tests pass : )

---------

Signed-off-by: shanjiaz &lt;zsjwpianpian@gmail.com&gt;
Co-authored-by: Dipika Sikka &lt;dipikasikka1@gmail.com&gt;
Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;
diff --git a/src/llmcompressor/modifiers/quantization/calibration.py b/src/llmcompressor/modifiers/quantization/calibration.py
@@ -1,3 +1,4 @@
+import inspect
 from typing import Any, Dict, Optional, Tuple
 
 import torch
@@ -247,7 +248,16 @@ def calibrate_kv_cache_input_hook(
     kv_cache to singleton QuantizedKVParameterCache.
     """
     kv_cache = getattr(module, "kv_cache")
-    kwargs["past_key_values"] = kv_cache
+    if not hasattr(module, "_past_kv_name"):
+        # Determine which past KV parameter name to use once and cache it
+        # TODO: Find a better place to cache this
+        module._past_kv_name = (
+            "past_key_value"  # transformers#39956
+            if "past_key_value" in inspect.signature(module.forward).parameters
+            else "past_key_values"
+        )
+
+    kwargs[module._past_kv_name] = kv_cache
     kwargs["use_cache"] = False
     return args, kwargs