From 94ea20ad41afe67df596bcd9c985f4adc0b4e8e0 Mon Sep 17 00:00:00 2001
From: MekkCyber <mekk.cyber@gmail.com>
Date: Thu, 10 Apr 2025 16:23:41 +0000
Subject: [PATCH] refix

---
 tests/quantization/quark_integration/test_quark.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/quantization/quark_integration/test_quark.py b/tests/quantization/quark_integration/test_quark.py
index 81584fa02e0e..1747ec31f47b 100644
--- a/tests/quantization/quark_integration/test_quark.py
+++ b/tests/quantization/quark_integration/test_quark.py
@@ -43,6 +43,7 @@ def test_commmon_args(self):
 @slow
 @require_quark
 @require_torch_gpu
+@require_read_token
 class QuarkTest(unittest.TestCase):
     reference_model_name = "meta-llama/Llama-3.1-8B-Instruct"
     quantized_model_name = "amd/Llama-3.1-8B-Instruct-w-int8-a-int8-sym-test"
@@ -75,13 +76,11 @@ def setUpClass(cls):
             device_map=cls.device_map,
         )
 
-    @require_read_token
     def test_memory_footprint(self):
         mem_quantized = self.quantized_model.get_memory_footprint()
 
         self.assertTrue(self.mem_fp16 / mem_quantized > self.EXPECTED_RELATIVE_DIFFERENCE)
 
-    @require_read_token
     def test_device_and_dtype_assignment(self):
         r"""
         Test whether trying to cast (or assigning a device to) a model after quantization will throw an error.
@@ -95,7 +94,6 @@ def test_device_and_dtype_assignment(self):
             # Tries with a `dtype``
             self.quantized_model.to(torch.float16)
 
-    @require_read_token
     def test_original_dtype(self):
         r"""
         A simple test to check if the model succesfully stores the original dtype
@@ -106,7 +104,6 @@ def test_original_dtype(self):
 
         self.assertTrue(isinstance(self.quantized_model.model.layers[0].mlp.gate_proj, QParamsLinear))
 
-    @require_read_token
     def check_inference_correctness(self, model):
         r"""
         Test the generation quality of the quantized model and see that we are matching the expected output.
@@ -130,7 +127,6 @@ def check_inference_correctness(self, model):
         # Get the generation
         self.assertIn(self.tokenizer.decode(output_sequences[0], skip_special_tokens=True), self.EXPECTED_OUTPUTS)
 
-    @require_read_token
     def test_generate_quality(self):
         """
         Simple test to check the quality of the model by comparing the generated tokens with the expected tokens