diff --git a/.github/workflows/check_code_quality.yml b/.github/workflows/check_code_quality.yml
index fe788bc34..7410502b0 100644
--- a/.github/workflows/check_code_quality.yml
+++ b/.github/workflows/check_code_quality.yml
@@ -48,4 +48,5 @@ jobs:
     - name: Check style with ruff
       run: |
         source venv/bin/activate
+        ruff format . --diff
         ruff check .
diff --git a/Makefile b/Makefile
index 12febb89c..c9444e23f 100644
--- a/Makefile
+++ b/Makefile
@@ -60,9 +60,11 @@ transformers_examples:
 # Run code quality checks
 style_check:
 	ruff check .
+	ruff format . --diff
 
 style:
 	ruff check . --fix
+	ruff format .
 
 # Utilities to release to PyPi
 build_dist_install_tools:
diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py
index bedf48ec9..8b7134647 100755
--- a/examples/language-modeling/run_clm.py
+++ b/examples/language-modeling/run_clm.py
@@ -462,7 +462,7 @@ def main():
             model = AutoModelForCausalLM.from_config(config, trust_remote_code=model_args.trust_remote_code)
 
         n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
-        logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
+        logger.info(f"Training new model from scratch - Total size={n_params / 2**20:.2f}M params")
 
     # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
     # on a small vocab and want a smaller embedding size, remove this test.
diff --git a/examples/question-answering/trainer_qa.py b/examples/question-answering/trainer_qa.py
index 8243448a0..1e1119de3 100644
--- a/examples/question-answering/trainer_qa.py
+++ b/examples/question-answering/trainer_qa.py
@@ -15,6 +15,7 @@
 """
 A subclass of `Trainer` specific to Question-Answering tasks
 """
+
 import math
 import time
 
diff --git a/examples/question-answering/trainer_seq2seq_qa.py b/examples/question-answering/trainer_seq2seq_qa.py
index 6e04bf3f6..2a3dbe5ca 100644
--- a/examples/question-answering/trainer_seq2seq_qa.py
+++ b/examples/question-answering/trainer_seq2seq_qa.py
@@ -15,6 +15,7 @@
 """
 A subclass of `Trainer` specific to Question-Answering tasks
 """
+
 import math
 import time
 from typing import Dict, List, Optional
diff --git a/examples/question-answering/utils_qa.py b/examples/question-answering/utils_qa.py
index 23a46370d..79497dbb8 100644
--- a/examples/question-answering/utils_qa.py
+++ b/examples/question-answering/utils_qa.py
@@ -15,6 +15,7 @@
 """
 Post-processing utilities for question answering.
 """
+
 import collections
 import json
 import logging
diff --git a/examples/summarization/run_summarization.py b/examples/summarization/run_summarization.py
index 5a442c075..90be3c604 100755
--- a/examples/summarization/run_summarization.py
+++ b/examples/summarization/run_summarization.py
@@ -525,9 +525,9 @@ def main():
         return
 
     if isinstance(tokenizer, tuple(MULTILINGUAL_TOKENIZERS)):
-        assert (
-            data_args.lang is not None
-        ), f"{tokenizer.__class__.__name__} is a multilingual tokenizer which requires --lang argument"
+        assert data_args.lang is not None, (
+            f"{tokenizer.__class__.__name__} is a multilingual tokenizer which requires --lang argument"
+        )
 
         tokenizer.src_lang = data_args.lang
         tokenizer.tgt_lang = data_args.lang
diff --git a/examples/text-classification/run_glue.py b/examples/text-classification/run_glue.py
index 75b321be0..e9f1fb6f0 100755
--- a/examples/text-classification/run_glue.py
+++ b/examples/text-classification/run_glue.py
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-""" Finetuning the library models for sequence classification on GLUE."""
+"""Finetuning the library models for sequence classification on GLUE."""
 # You can also adapt this script on your own text classification task. Pointers for this are left as comments.
 
 import logging
@@ -158,9 +158,9 @@ def __post_init__(self):
             train_extension = self.train_file.split(".")[-1]
             assert train_extension in ["csv", "json"], "`train_file` should be a csv or a json file."
             validation_extension = self.validation_file.split(".")[-1]
-            assert (
-                validation_extension == train_extension
-            ), "`validation_file` should have the same extension (csv or json) as `train_file`."
+            assert validation_extension == train_extension, (
+                "`validation_file` should have the same extension (csv or json) as `train_file`."
+            )
 
 
 @dataclass
@@ -329,9 +329,9 @@ def main():
             if data_args.test_file is not None:
                 train_extension = data_args.train_file.split(".")[-1]
                 test_extension = data_args.test_file.split(".")[-1]
-                assert (
-                    test_extension == train_extension
-                ), "`test_file` should have the same extension (csv or json) as `train_file`."
+                assert test_extension == train_extension, (
+                    "`test_file` should have the same extension (csv or json) as `train_file`."
+                )
                 data_files["test"] = data_args.test_file
             else:
                 raise ValueError("Need either a GLUE task or a test file for `do_predict`.")
diff --git a/examples/text-classification/run_xnli.py b/examples/text-classification/run_xnli.py
index 4b06d2653..23f79a5bb 100755
--- a/examples/text-classification/run_xnli.py
+++ b/examples/text-classification/run_xnli.py
@@ -14,8 +14,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-""" Finetuning multi-lingual models on XNLI (e.g. Bert, DistilBERT, XLM).
-    Adapted from `examples/text-classification/run_glue.py`"""
+"""Finetuning multi-lingual models on XNLI (e.g. Bert, DistilBERT, XLM).
+Adapted from `examples/text-classification/run_glue.py`"""
 
 import logging
 import os
diff --git a/optimum/exporters/neuron/__main__.py b/optimum/exporters/neuron/__main__.py
index 3e2d00ef6..5a549a697 100644
--- a/optimum/exporters/neuron/__main__.py
+++ b/optimum/exporters/neuron/__main__.py
@@ -662,7 +662,7 @@ def main_export(
             )
 
             logger.info(
-                f"The {NEURON_COMPILER} export succeeded and the exported model was saved at: " f"{output.as_posix()}"
+                f"The {NEURON_COMPILER} export succeeded and the exported model was saved at: {output.as_posix()}"
             )
         except ShapeError as e:
             raise e
@@ -678,8 +678,7 @@ def main_export(
             )
         except Exception as e:
             logger.error(
-                f"An error occured with the error message: {e}.\n The exported model was saved at: "
-                f"{output.as_posix()}"
+                f"An error occured with the error message: {e}.\n The exported model was saved at: {output.as_posix()}"
             )
 
 
diff --git a/optimum/exporters/neuron/config.py b/optimum/exporters/neuron/config.py
index 82e842954..80958f70b 100644
--- a/optimum/exporters/neuron/config.py
+++ b/optimum/exporters/neuron/config.py
@@ -16,6 +16,7 @@
 Common Neuron configuration classes that handle most of the features for building model specific
 configurations.
 """
+
 from typing import List
 
 from ...utils import (
diff --git a/optimum/exporters/neuron/convert.py b/optimum/exporters/neuron/convert.py
index b9b1a79c5..a30a7ba47 100644
--- a/optimum/exporters/neuron/convert.py
+++ b/optimum/exporters/neuron/convert.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Neuron compiled model check and export functions."""
+
 import copy
 import time
 from collections import OrderedDict
@@ -282,7 +283,7 @@ def validate_model_outputs(
 
     if shape_failures:
         msg = "\n".join(f"- {t[0]}: got {t[1]} (reference) and {t[2]} (neuron)" for t in shape_failures)
-        raise ShapeError("Output shapes do not match between reference model and the Neuron exported model:\n" "{msg}")
+        raise ShapeError("Output shapes do not match between reference model and the Neuron exported model:\n{msg}")
 
     if value_failures:
         msg = "\n".join(f"- {t[0]}: max diff = {t[1]}" for t in value_failures)
diff --git a/optimum/exporters/neuron/model_configs/decoder_configs.py b/optimum/exporters/neuron/model_configs/decoder_configs.py
index 30ddc808e..e2273610f 100644
--- a/optimum/exporters/neuron/model_configs/decoder_configs.py
+++ b/optimum/exporters/neuron/model_configs/decoder_configs.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 """Neuron export configurations for models using transformers_neuronx."""
 
-
 from optimum.exporters.tasks import TasksManager
 
 from ....neuron.models.granite.model import GraniteForSampling
diff --git a/optimum/exporters/neuron/model_wrappers.py b/optimum/exporters/neuron/model_wrappers.py
index f1fb2995e..2e1c15639 100644
--- a/optimum/exporters/neuron/model_wrappers.py
+++ b/optimum/exporters/neuron/model_wrappers.py
@@ -247,13 +247,13 @@ def forward(self, input_ids, attention_mask):
         batch_size = input_ids.shape[0]
         sequence_length = input_ids.shape[1]
         if self.sequence_length is not None:
-            assert (
-                self.sequence_length
-            ), f"Different sequence length for the parallel partition({self.sequence_length}) and for dummy inputs({sequence_length}). Make sure that they have the same value."
+            assert self.sequence_length, (
+                f"Different sequence length for the parallel partition({self.sequence_length}) and for dummy inputs({sequence_length}). Make sure that they have the same value."
+            )
         if self.batch_size is not None:
-            assert (
-                self.batch_size
-            ), f"Different batch size for the parallel partition({self.batch_size}) and for dummy inputs({batch_size}). Make sure that they have the same value."
+            assert self.batch_size, (
+                f"Different batch size for the parallel partition({self.batch_size}) and for dummy inputs({batch_size}). Make sure that they have the same value."
+            )
 
         encoder_output = self.model.encoder(
             input_ids=input_ids, attention_mask=attention_mask, output_attentions=False, output_hidden_states=False
diff --git a/optimum/neuron/accelerate/utils/operations.py b/optimum/neuron/accelerate/utils/operations.py
index 11345ca10..9e241dd6e 100644
--- a/optimum/neuron/accelerate/utils/operations.py
+++ b/optimum/neuron/accelerate/utils/operations.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 """Custom operations related to accelerate for Neuron."""
 
-
 import torch
 from accelerate.utils.operations import recursively_apply
 
diff --git a/optimum/neuron/distributed/encoder_decoder_models.py b/optimum/neuron/distributed/encoder_decoder_models.py
index 0af70494c..b86df32a8 100644
--- a/optimum/neuron/distributed/encoder_decoder_models.py
+++ b/optimum/neuron/distributed/encoder_decoder_models.py
@@ -268,7 +268,7 @@ def project(hidden_states, proj_layer, key_value_states, past_key_value):
             if past_key_value is not None:
                 if len(past_key_value) != 2:
                     raise ValueError(
-                        f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states"
+                        f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states"
                     )
                 real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length
 
diff --git a/optimum/neuron/distributed/parallel_layers.py b/optimum/neuron/distributed/parallel_layers.py
index bbba16c88..fd489e4eb 100644
--- a/optimum/neuron/distributed/parallel_layers.py
+++ b/optimum/neuron/distributed/parallel_layers.py
@@ -108,7 +108,7 @@ def prepare_parallel_layer_specific_kwargs(cls, **parallel_layer_specific_kwargs
                 name for name in parallel_layer_specific_kwargs if name not in default_parallel_layer_specific_kwargs
             ]
             logger.debug(
-                f'The following arguments are not allowed for {cls.__name__}: {", ".join(wrong_argument_names)}, they '
+                f"The following arguments are not allowed for {cls.__name__}: {', '.join(wrong_argument_names)}, they "
                 "will be ignored."
             )
 
diff --git a/optimum/neuron/distributed/parallelizers_manager.py b/optimum/neuron/distributed/parallelizers_manager.py
index 5f3cc2df3..a0a4fce89 100644
--- a/optimum/neuron/distributed/parallelizers_manager.py
+++ b/optimum/neuron/distributed/parallelizers_manager.py
@@ -28,7 +28,7 @@
 
 
 def parallelizer_classes_resolver(
-    model_type_to_parallelizer_class_name: Dict[str, str]
+    model_type_to_parallelizer_class_name: Dict[str, str],
 ) -> Dict[str, Type[Parallelizer]]:
     modules = []
     for module_name in _PARALLELIZER_CLASSES_MODULE_NAMES:
diff --git a/optimum/neuron/modeling.py b/optimum/neuron/modeling.py
index fa5681d26..7edbf1c61 100644
--- a/optimum/neuron/modeling.py
+++ b/optimum/neuron/modeling.py
@@ -179,9 +179,7 @@ def forward(
             # last_hidden_state -> (batch_size, sequencen_len, hidden_size)
             last_hidden_state = self.remove_padding(
                 [outputs[0]], dims=[0, 1], indices=[input_ids.shape[0], input_ids.shape[1]]
-            )[
-                0
-            ]  # Remove padding on batch_size(0), and sequence_length(1)
+            )[0]  # Remove padding on batch_size(0), and sequence_length(1)
             if len(outputs) > 1:
                 # pooler_output -> (batch_size, hidden_size)
                 pooler_output = self.remove_padding([outputs[1]], dims=[0], indices=[input_ids.shape[0]])[
@@ -264,9 +262,7 @@ def forward(
                 # token_embeddings -> (batch_size, sequencen_len, hidden_size)
                 token_embeddings = self.remove_padding(
                     [outputs[0]], dims=[0, 1], indices=[input_ids.shape[0], input_ids.shape[1]]
-                )[
-                    0
-                ]  # Remove padding on batch_size(0), and sequence_length(1)
+                )[0]  # Remove padding on batch_size(0), and sequence_length(1)
                 # sentence_embedding -> (batch_size, hidden_size)
                 sentence_embedding = self.remove_padding([outputs[1]], dims=[0], indices=[input_ids.shape[0]])[
                     0
diff --git a/optimum/neuron/modeling_diffusion.py b/optimum/neuron/modeling_diffusion.py
index 53f2df76e..944d76ebe 100644
--- a/optimum/neuron/modeling_diffusion.py
+++ b/optimum/neuron/modeling_diffusion.py
@@ -1183,7 +1183,9 @@ def forward(
         if output_hidden_states:
             assert (
                 self.config.output_hidden_states or self.config.neuron.get("output_hidden_states")
-            ) == output_hidden_states, "output_hidden_states is expected to be False since the model was compiled without hidden_states as output."
+            ) == output_hidden_states, (
+                "output_hidden_states is expected to be False since the model was compiled without hidden_states as output."
+            )
 
         input_ids = input_ids.to(torch.long)  # dummy generator uses long int for tracing
         inputs = (input_ids,)
diff --git a/optimum/neuron/models/granite/hlo.py b/optimum/neuron/models/granite/hlo.py
index d66f12b8d..59330b438 100644
--- a/optimum/neuron/models/granite/hlo.py
+++ b/optimum/neuron/models/granite/hlo.py
@@ -35,7 +35,6 @@ def scale_mul(t, scale):
 
 
 class GraniteForSamplingNoEmbeddingHlo:
-
     def __init__(self, config: GraniteConfig, neuron_config: Optional[NeuronConfig] = None):
         self.config = config
         self.neuron_config = neuron_config
@@ -324,9 +323,9 @@ def layer(
             tp_degree=self.config.tp_degree,
         )
         if self.neuron_config.fuse_mlp:
-            assert all(
-                (not (x) for x in [in0_weight, in1_weight, out_weight, in0_scales, in1_scales, out_scales])
-            ), "in0, in1 and out weights have to be None"
+            assert all((not (x) for x in [in0_weight, in1_weight, out_weight, in0_scales, in1_scales, out_scales])), (
+                "in0, in1 and out weights have to be None"
+            )
             in0_weight, in0_scales = mlp_in_weight, mlp_in_scales
             out_weight, out_scales = mlp_out_weight, mlp_out_scales
 
@@ -688,7 +687,6 @@ def attention(
 
         # Single Token Generation ("Prefetch"-style) ans speculative forward
         if active_mask is not None:
-
             n_active_tokens = key.sizes[1] if bsh_cache_layout else key.sizes[0]
             if n_active_tokens > 1 and self.neuron_config and self.neuron_config.continuous_batching:
                 # For speculative forward + continuous batching, slice out samples in the batch size
diff --git a/optimum/neuron/models/granite/model.py b/optimum/neuron/models/granite/model.py
index ddd3aecf2..7b706ef19 100644
--- a/optimum/neuron/models/granite/model.py
+++ b/optimum/neuron/models/granite/model.py
@@ -159,9 +159,9 @@ def load_weights(self):
 
             # Note: Automatic MLP padding is safe since zeros are *only* introduced to intermediary state
             if self.neuron_config.fuse_mlp:
-                assert all(
-                    getattr(mlp, attr, None) for attr in ["gate_proj", "up_proj"]
-                ), "fuse_mlp need to have gate and up proj weights"
+                assert all(getattr(mlp, attr, None) for attr in ["gate_proj", "up_proj"]), (
+                    "fuse_mlp need to have gate and up proj weights"
+                )
                 assert all(
                     getattr(mlp, attr, None).weight.shape[0] % self.config.tp_degree == 0
                     for attr in ["gate_proj", "up_proj"]
diff --git a/optimum/neuron/models/granite/modules.py b/optimum/neuron/models/granite/modules.py
index 4cbbcc9f3..84b36a38d 100644
--- a/optimum/neuron/models/granite/modules.py
+++ b/optimum/neuron/models/granite/modules.py
@@ -18,7 +18,6 @@
 
 
 class GraniteForCausalLM(module.PretrainedModel):
-
     def __init__(self, config: GraniteConfig):
         super().__init__()
         dtype, _, _ = utils.parse_amp(config.amp)
@@ -34,7 +33,6 @@ def get_base_model(self):
 
 
 class GraniteModel(module.LowMemoryModule):
-
     def __init__(self, config: GraniteConfig):
         super().__init__()
         self.embed_tokens = module.LowMemoryEmbedding(config.vocab_size, config.hidden_size)
@@ -45,14 +43,12 @@ def __init__(self, config: GraniteConfig):
 
 
 class GraniteRMSNorm(module.LowMemoryModule):
-
     def __init__(self, config: GraniteConfig) -> None:
         super().__init__()
         self.weight = module.UninitializedParameter()
 
 
 class GraniteDecoderLayer(module.LowMemoryModule):
-
     def __init__(self, config: GraniteConfig):
         super().__init__()
         self.self_attn = GraniteAttention(config)
@@ -62,7 +58,6 @@ def __init__(self, config: GraniteConfig):
 
 
 class GraniteAttention(module.LowMemoryModule):
-
     def __init__(self, config: GraniteConfig):
         super().__init__()
         self.hidden_size = config.hidden_size
@@ -77,7 +72,6 @@ def __init__(self, config: GraniteConfig):
 
 
 class GraniteMLP(module.LowMemoryModule):
-
     def __init__(self, config: GraniteConfig):
         super().__init__()
         dtype, _, _ = utils.parse_amp(config.amp)
diff --git a/optimum/neuron/models/qwen2/model.py b/optimum/neuron/models/qwen2/model.py
index 8ee60d9b4..8396a8fba 100644
--- a/optimum/neuron/models/qwen2/model.py
+++ b/optimum/neuron/models/qwen2/model.py
@@ -156,9 +156,9 @@ def load_weights(self):
 
             # Note: Automatic MLP padding is safe since zeros are *only* introduced to intermediary state
             if self.neuron_config.fuse_mlp:
-                assert all(
-                    getattr(mlp, attr, None) for attr in ["gate_proj", "up_proj"]
-                ), "fuse_mlp need to have gate and up proj weights"
+                assert all(getattr(mlp, attr, None) for attr in ["gate_proj", "up_proj"]), (
+                    "fuse_mlp need to have gate and up proj weights"
+                )
                 assert all(
                     getattr(mlp, attr, None).weight.shape[0] % self.config.tp_degree == 0
                     for attr in ["gate_proj", "up_proj"]
diff --git a/optimum/neuron/utils/cache_utils.py b/optimum/neuron/utils/cache_utils.py
index 28845713c..84a760988 100644
--- a/optimum/neuron/utils/cache_utils.py
+++ b/optimum/neuron/utils/cache_utils.py
@@ -62,7 +62,7 @@
 
 
 def load_custom_cache_repo_name_from_hf_home(
-    hf_home_cache_repo_file: Union[str, Path] = HF_HOME_CACHE_REPO_FILE
+    hf_home_cache_repo_file: Union[str, Path] = HF_HOME_CACHE_REPO_FILE,
 ) -> Optional[str]:
     if Path(hf_home_cache_repo_file).exists():
         with open(hf_home_cache_repo_file, "r") as fp:
diff --git a/optimum/neuron/utils/peft_utils.py b/optimum/neuron/utils/peft_utils.py
index 7780ff7ed..4866669ac 100644
--- a/optimum/neuron/utils/peft_utils.py
+++ b/optimum/neuron/utils/peft_utils.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Utilities related to the PEFT library and support."""
+
 import collections
 import functools
 import os
diff --git a/optimum/neuron/utils/version_utils.py b/optimum/neuron/utils/version_utils.py
index 818e2bc1e..368c4d186 100644
--- a/optimum/neuron/utils/version_utils.py
+++ b/optimum/neuron/utils/version_utils.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Version utilities."""
+
 from typing import Optional
 
 from packaging import version
diff --git a/pyproject.toml b/pyproject.toml
index 242fb9579..b7f0fb60d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,9 @@ ignore = ["C901", "E501", "E741", "W605"]
 select = ["C", "E", "F", "I", "W"]
 exclude = ["*.ipynb"]
 
+[tool.ruff.format]
+exclude = ["*.ipynb"]
+
 # Ignore import violations in all `__init__.py` files.
 [tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["E402", "F401", "F403", "F811"]
diff --git a/tests/cli/test_neuron_cache_cli.py b/tests/cli/test_neuron_cache_cli.py
index 0d7887445..c8b6da574 100644
--- a/tests/cli/test_neuron_cache_cli.py
+++ b/tests/cli/test_neuron_cache_cli.py
@@ -50,18 +50,18 @@ def _optimum_neuron_cache_create(self, cache_repo_id: Optional[str] = None, publ
         try:
             repo_id = cache_repo_id if cache_repo_id is not None else CACHE_REPO_NAME
             info = HfApi().repo_info(repo_id, repo_type="model")
-            assert info.private == (
-                not public
-            ), "The privacy of the repo should match the presence of the --public flag."
+            assert info.private == (not public), (
+                "The privacy of the repo should match the presence of the --public flag."
+            )
 
         except RepositoryNotFoundError:
             pytest.fail("The repo was not created.")
         finally:
             delete_repo(repo_id)
 
-        assert (
-            repo_id == load_custom_cache_repo_name_from_hf_home()
-        ), f"Saved local Neuron cache name should be equal to {repo_id}."
+        assert repo_id == load_custom_cache_repo_name_from_hf_home(), (
+            f"Saved local Neuron cache name should be equal to {repo_id}."
+        )
 
     def test_optimum_neuron_cache_create_with_custom_name(self, hub_test):
         seed = random.randint(0, 100)
@@ -79,9 +79,9 @@ def test_optimum_neuron_cache_set(self, hub_test):
         p = subprocess.Popen(command)
         returncode = p.wait()
         assert returncode == 0
-        assert (
-            repo_id == load_custom_cache_repo_name_from_hf_home()
-        ), f"Saved local Neuron cache name should be equal to {repo_id}."
+        assert repo_id == load_custom_cache_repo_name_from_hf_home(), (
+            f"Saved local Neuron cache name should be equal to {repo_id}."
+        )
 
     def test_optimum_neuron_cache_add(self, hub_test):
         with TemporaryDirectory() as tmpdir:
diff --git a/tests/test_generate.py b/tests/test_generate.py
index 706e3538b..a4b054240 100644
--- a/tests/test_generate.py
+++ b/tests/test_generate.py
@@ -104,12 +104,12 @@ def test_greedy_decoding(self, model_name, use_cache, decoder_only, compiler_fla
 
         cpu_samples = _test_generative_decoding(model_name=model_name, device="cpu", decoder_only=decoder_only)
 
-        assert np.array_equal(
-            cpu_samples, xla_neuron_samples_fp32
-        ), "XLA Neuron FP32 output doesn't match CPU only output"
-        assert np.array_equal(
-            cpu_samples, xla_neuron_samples_bf16
-        ), "XLA Neuron bf16 output doesn't match CPU only output"
+        assert np.array_equal(cpu_samples, xla_neuron_samples_fp32), (
+            "XLA Neuron FP32 output doesn't match CPU only output"
+        )
+        assert np.array_equal(cpu_samples, xla_neuron_samples_bf16), (
+            "XLA Neuron bf16 output doesn't match CPU only output"
+        )
 
     @parameterized.expand(BEAM_SEARCH_TESTDATA)
     @pytest.mark.skip("Remove once generate fix (#262) has been merged.")
@@ -130,9 +130,9 @@ def test_beam_search_decoding(self, model_name, use_cache, decoder_only, compile
             model_name=model_name, device="cpu", decoder_only=decoder_only, generation_config_update=config_update
         )
 
-        assert np.array_equal(
-            cpu_samples, xla_neuron_samples_fp32
-        ), "XLA Neuron FP32 output doesn't match CPU only output"
-        assert np.array_equal(
-            cpu_samples, xla_neuron_samples_bf16
-        ), "XLA Neuron bf16 output doesn't match CPU only output"
+        assert np.array_equal(cpu_samples, xla_neuron_samples_fp32), (
+            "XLA Neuron FP32 output doesn't match CPU only output"
+        )
+        assert np.array_equal(cpu_samples, xla_neuron_samples_bf16), (
+            "XLA Neuron bf16 output doesn't match CPU only output"
+        )
diff --git a/tests/test_trainers.py b/tests/test_trainers.py
index 17f79248c..0d1650362 100644
--- a/tests/test_trainers.py
+++ b/tests/test_trainers.py
@@ -267,12 +267,12 @@ def test_train_and_eval_use_remote_cache(self, hub_test_with_local_cache, tmpdir
 
         # TODO: investigate that, not urgent.
         assert files_in_repo == last_files_in_repo, "No file should have been added to the Hub after first training."
-        assert (
-            files_in_cache == last_files_in_cache
-        ), "No file should have been added to the cache after first training."
-        assert (
-            second_training_duration < first_training_duration
-        ), "Second training should be faster because cached graphs can be used."
+        assert files_in_cache == last_files_in_cache, (
+            "No file should have been added to the cache after first training."
+        )
+        assert second_training_duration < first_training_duration, (
+            "Second training should be faster because cached graphs can be used."
+        )
 
     @pytest.mark.skip("Test in later release")
     def test_save_and_resume_from_checkpoint(self, parallel_sizes, tmpdir):
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 2b84d7a52..4614f4944 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -53,9 +53,9 @@ def test_patch_model():
             pass
 
     wav2vec2_model = Wav2Vec2Model(Wav2Vec2Config())
-    assert (
-        wav2vec2_model.config.layerdrop > 0
-    ), "Default Wav2vec2Config layerdrop value is already 0 so the test will not check anything."
+    assert wav2vec2_model.config.layerdrop > 0, (
+        "Default Wav2vec2Config layerdrop value is already 0 so the test will not check anything."
+    )
     patching_specs = []
     for spec in MODEL_PATCHING_SPECS:
         patching_specs.append((wav2vec2_model,) + spec)
diff --git a/text-generation-inference/server/text_generation_server/model.py b/text-generation-inference/server/text_generation_server/model.py
index c4a692c95..e8cb34ee1 100644
--- a/text-generation-inference/server/text_generation_server/model.py
+++ b/text-generation-inference/server/text_generation_server/model.py
@@ -56,7 +56,7 @@ def log_cache_size():
     if os.path.exists(path):
         usage = shutil.disk_usage(path)
         gb = 2**30
-        logger.info(f"Cache disk [{path}]: total = {usage.total/gb:.2f} G, free = {usage.free/gb:.2f} G")
+        logger.info(f"Cache disk [{path}]: total = {usage.total / gb:.2f} G, free = {usage.free / gb:.2f} G")
     else:
         raise ValueError(f"The cache directory ({path}) does not exist.")
 
diff --git a/text-generation-inference/tgi_env.py b/text-generation-inference/tgi_env.py
index 6855b468a..ff647c988 100755
--- a/text-generation-inference/tgi_env.py
+++ b/text-generation-inference/tgi_env.py
@@ -50,7 +50,7 @@ def parse_cmdline_and_set_env(argv: List[str] = None) -> argparse.Namespace:
     args = parser.parse_known_args(argv)[0]
 
     if not args.model_id:
-        raise Exception("No model id provided ! Either specify it using --model-id cmdline " "or MODEL_ID env var")
+        raise Exception("No model id provided ! Either specify it using --model-id cmdline or MODEL_ID env var")
 
     # Override env with cmdline params
     os.environ["MODEL_ID"] = args.model_id
@@ -109,7 +109,7 @@ def lookup_compatible_cached_model(model_id: str, revision: Optional[str]) -> Op
 
     if not all_compatible:
         logger.debug(
-            "No compatible cached entry found for model %s, env %s, available cores %s, " "neuronxcc version %s",
+            "No compatible cached entry found for model %s, env %s, available cores %s, neuronxcc version %s",
             model_id,
             get_env_dict(),
             available_cores,
@@ -139,7 +139,7 @@ def check_env_and_neuron_config_compatibility(neuron_config: Dict[str, Any], che
 
     if check_compiler_version and neuron_config["compiler_version"] != neuronxcc_version:
         logger.debug(
-            "Compiler version conflict, the local one " "(%s) differs from the one used to compile the model (%s)",
+            "Compiler version conflict, the local one (%s) differs from the one used to compile the model (%s)",
             neuronxcc_version,
             neuron_config["compiler_version"],
         )
@@ -163,7 +163,7 @@ def check_env_and_neuron_config_compatibility(neuron_config: Dict[str, Any], che
         sequence_length = neuron_config["sequence_length"]
         if max_input_tokens >= sequence_length:
             logger.debug(
-                "Specified max input tokens is not compatible with config sequence length " "( %s >= %s)",
+                "Specified max input tokens is not compatible with config sequence length ( %s >= %s)",
                 max_input_tokens,
                 sequence_length,
             )
@@ -205,7 +205,7 @@ def main():
         if not compatible:
             env_dict = get_env_dict()
             msg = (
-                "Invalid neuron config and env. Config {}, env {}, available cores {}, " "neuronxcc version {}"
+                "Invalid neuron config and env. Config {}, env {}, available cores {}, neuronxcc version {}"
             ).format(neuron_config, env_dict, available_cores, neuronxcc_version)
             logger.error(msg)
             raise Exception(msg)
@@ -213,9 +213,9 @@ def main():
         neuron_config = lookup_compatible_cached_model(args.model_id, args.revision)
 
     if not neuron_config:
-        msg = (
-            "No compatible neuron config found. Provided env {}, " "available cores {}, neuronxcc version {}"
-        ).format(get_env_dict(), available_cores, neuronxcc_version)
+        msg = ("No compatible neuron config found. Provided env {}, available cores {}, neuronxcc version {}").format(
+            get_env_dict(), available_cores, neuronxcc_version
+        )
         logger.error(msg)
         raise Exception(msg)
 
diff --git a/tools/auto_fill_inference_cache.py b/tools/auto_fill_inference_cache.py
index faa1e6e2d..e1772d784 100644
--- a/tools/auto_fill_inference_cache.py
+++ b/tools/auto_fill_inference_cache.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Script to cache models for inference."""
+
 import argparse
 import json
 import logging