diff --git a/.github/workflows/check_code_quality.yml b/.github/workflows/check_code_quality.yml index fe788bc34..7410502b0 100644 --- a/.github/workflows/check_code_quality.yml +++ b/.github/workflows/check_code_quality.yml @@ -48,4 +48,5 @@ jobs: - name: Check style with ruff run: | source venv/bin/activate + ruff format . --diff ruff check . diff --git a/Makefile b/Makefile index 12febb89c..c9444e23f 100644 --- a/Makefile +++ b/Makefile @@ -60,9 +60,11 @@ transformers_examples: # Run code quality checks style_check: ruff check . + ruff format . --diff style: ruff check . --fix + ruff format . # Utilities to release to PyPi build_dist_install_tools: diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py index bedf48ec9..8b7134647 100755 --- a/examples/language-modeling/run_clm.py +++ b/examples/language-modeling/run_clm.py @@ -462,7 +462,7 @@ def main(): model = AutoModelForCausalLM.from_config(config, trust_remote_code=model_args.trust_remote_code) n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values()) - logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params") + logger.info(f"Training new model from scratch - Total size={n_params / 2**20:.2f}M params") # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch # on a small vocab and want a smaller embedding size, remove this test. diff --git a/examples/question-answering/trainer_qa.py b/examples/question-answering/trainer_qa.py index 8243448a0..1e1119de3 100644 --- a/examples/question-answering/trainer_qa.py +++ b/examples/question-answering/trainer_qa.py @@ -15,6 +15,7 @@ """ A subclass of `Trainer` specific to Question-Answering tasks """ + import math import time diff --git a/examples/question-answering/trainer_seq2seq_qa.py b/examples/question-answering/trainer_seq2seq_qa.py index 6e04bf3f6..2a3dbe5ca 100644 --- a/examples/question-answering/trainer_seq2seq_qa.py +++ b/examples/question-answering/trainer_seq2seq_qa.py @@ -15,6 +15,7 @@ """ A subclass of `Trainer` specific to Question-Answering tasks """ + import math import time from typing import Dict, List, Optional diff --git a/examples/question-answering/utils_qa.py b/examples/question-answering/utils_qa.py index 23a46370d..79497dbb8 100644 --- a/examples/question-answering/utils_qa.py +++ b/examples/question-answering/utils_qa.py @@ -15,6 +15,7 @@ """ Post-processing utilities for question answering. """ + import collections import json import logging diff --git a/examples/summarization/run_summarization.py b/examples/summarization/run_summarization.py index 5a442c075..90be3c604 100755 --- a/examples/summarization/run_summarization.py +++ b/examples/summarization/run_summarization.py @@ -525,9 +525,9 @@ def main(): return if isinstance(tokenizer, tuple(MULTILINGUAL_TOKENIZERS)): - assert ( - data_args.lang is not None - ), f"{tokenizer.__class__.__name__} is a multilingual tokenizer which requires --lang argument" + assert data_args.lang is not None, ( + f"{tokenizer.__class__.__name__} is a multilingual tokenizer which requires --lang argument" + ) tokenizer.src_lang = data_args.lang tokenizer.tgt_lang = data_args.lang diff --git a/examples/text-classification/run_glue.py b/examples/text-classification/run_glue.py index 75b321be0..e9f1fb6f0 100755 --- a/examples/text-classification/run_glue.py +++ b/examples/text-classification/run_glue.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning the library models for sequence classification on GLUE.""" +"""Finetuning the library models for sequence classification on GLUE.""" # You can also adapt this script on your own text classification task. Pointers for this are left as comments. import logging @@ -158,9 +158,9 @@ def __post_init__(self): train_extension = self.train_file.split(".")[-1] assert train_extension in ["csv", "json"], "`train_file` should be a csv or a json file." validation_extension = self.validation_file.split(".")[-1] - assert ( - validation_extension == train_extension - ), "`validation_file` should have the same extension (csv or json) as `train_file`." + assert validation_extension == train_extension, ( + "`validation_file` should have the same extension (csv or json) as `train_file`." + ) @dataclass @@ -329,9 +329,9 @@ def main(): if data_args.test_file is not None: train_extension = data_args.train_file.split(".")[-1] test_extension = data_args.test_file.split(".")[-1] - assert ( - test_extension == train_extension - ), "`test_file` should have the same extension (csv or json) as `train_file`." + assert test_extension == train_extension, ( + "`test_file` should have the same extension (csv or json) as `train_file`." + ) data_files["test"] = data_args.test_file else: raise ValueError("Need either a GLUE task or a test file for `do_predict`.") diff --git a/examples/text-classification/run_xnli.py b/examples/text-classification/run_xnli.py index 4b06d2653..23f79a5bb 100755 --- a/examples/text-classification/run_xnli.py +++ b/examples/text-classification/run_xnli.py @@ -14,8 +14,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning multi-lingual models on XNLI (e.g. Bert, DistilBERT, XLM). - Adapted from `examples/text-classification/run_glue.py`""" +"""Finetuning multi-lingual models on XNLI (e.g. Bert, DistilBERT, XLM). +Adapted from `examples/text-classification/run_glue.py`""" import logging import os diff --git a/optimum/exporters/neuron/__main__.py b/optimum/exporters/neuron/__main__.py index 3e2d00ef6..5a549a697 100644 --- a/optimum/exporters/neuron/__main__.py +++ b/optimum/exporters/neuron/__main__.py @@ -662,7 +662,7 @@ def main_export( ) logger.info( - f"The {NEURON_COMPILER} export succeeded and the exported model was saved at: " f"{output.as_posix()}" + f"The {NEURON_COMPILER} export succeeded and the exported model was saved at: {output.as_posix()}" ) except ShapeError as e: raise e @@ -678,8 +678,7 @@ def main_export( ) except Exception as e: logger.error( - f"An error occured with the error message: {e}.\n The exported model was saved at: " - f"{output.as_posix()}" + f"An error occured with the error message: {e}.\n The exported model was saved at: {output.as_posix()}" ) diff --git a/optimum/exporters/neuron/config.py b/optimum/exporters/neuron/config.py index 82e842954..80958f70b 100644 --- a/optimum/exporters/neuron/config.py +++ b/optimum/exporters/neuron/config.py @@ -16,6 +16,7 @@ Common Neuron configuration classes that handle most of the features for building model specific configurations. """ + from typing import List from ...utils import ( diff --git a/optimum/exporters/neuron/convert.py b/optimum/exporters/neuron/convert.py index b9b1a79c5..a30a7ba47 100644 --- a/optimum/exporters/neuron/convert.py +++ b/optimum/exporters/neuron/convert.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Neuron compiled model check and export functions.""" + import copy import time from collections import OrderedDict @@ -282,7 +283,7 @@ def validate_model_outputs( if shape_failures: msg = "\n".join(f"- {t[0]}: got {t[1]} (reference) and {t[2]} (neuron)" for t in shape_failures) - raise ShapeError("Output shapes do not match between reference model and the Neuron exported model:\n" "{msg}") + raise ShapeError("Output shapes do not match between reference model and the Neuron exported model:\n{msg}") if value_failures: msg = "\n".join(f"- {t[0]}: max diff = {t[1]}" for t in value_failures) diff --git a/optimum/exporters/neuron/model_configs/decoder_configs.py b/optimum/exporters/neuron/model_configs/decoder_configs.py index 30ddc808e..e2273610f 100644 --- a/optimum/exporters/neuron/model_configs/decoder_configs.py +++ b/optimum/exporters/neuron/model_configs/decoder_configs.py @@ -14,7 +14,6 @@ # limitations under the License. """Neuron export configurations for models using transformers_neuronx.""" - from optimum.exporters.tasks import TasksManager from ....neuron.models.granite.model import GraniteForSampling diff --git a/optimum/exporters/neuron/model_wrappers.py b/optimum/exporters/neuron/model_wrappers.py index f1fb2995e..2e1c15639 100644 --- a/optimum/exporters/neuron/model_wrappers.py +++ b/optimum/exporters/neuron/model_wrappers.py @@ -247,13 +247,13 @@ def forward(self, input_ids, attention_mask): batch_size = input_ids.shape[0] sequence_length = input_ids.shape[1] if self.sequence_length is not None: - assert ( - self.sequence_length - ), f"Different sequence length for the parallel partition({self.sequence_length}) and for dummy inputs({sequence_length}). Make sure that they have the same value." + assert self.sequence_length, ( + f"Different sequence length for the parallel partition({self.sequence_length}) and for dummy inputs({sequence_length}). Make sure that they have the same value." + ) if self.batch_size is not None: - assert ( - self.batch_size - ), f"Different batch size for the parallel partition({self.batch_size}) and for dummy inputs({batch_size}). Make sure that they have the same value." + assert self.batch_size, ( + f"Different batch size for the parallel partition({self.batch_size}) and for dummy inputs({batch_size}). Make sure that they have the same value." + ) encoder_output = self.model.encoder( input_ids=input_ids, attention_mask=attention_mask, output_attentions=False, output_hidden_states=False diff --git a/optimum/neuron/accelerate/utils/operations.py b/optimum/neuron/accelerate/utils/operations.py index 11345ca10..9e241dd6e 100644 --- a/optimum/neuron/accelerate/utils/operations.py +++ b/optimum/neuron/accelerate/utils/operations.py @@ -14,7 +14,6 @@ # limitations under the License. """Custom operations related to accelerate for Neuron.""" - import torch from accelerate.utils.operations import recursively_apply diff --git a/optimum/neuron/distributed/encoder_decoder_models.py b/optimum/neuron/distributed/encoder_decoder_models.py index 0af70494c..b86df32a8 100644 --- a/optimum/neuron/distributed/encoder_decoder_models.py +++ b/optimum/neuron/distributed/encoder_decoder_models.py @@ -268,7 +268,7 @@ def project(hidden_states, proj_layer, key_value_states, past_key_value): if past_key_value is not None: if len(past_key_value) != 2: raise ValueError( - f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states" + f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states" ) real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length diff --git a/optimum/neuron/distributed/parallel_layers.py b/optimum/neuron/distributed/parallel_layers.py index bbba16c88..fd489e4eb 100644 --- a/optimum/neuron/distributed/parallel_layers.py +++ b/optimum/neuron/distributed/parallel_layers.py @@ -108,7 +108,7 @@ def prepare_parallel_layer_specific_kwargs(cls, **parallel_layer_specific_kwargs name for name in parallel_layer_specific_kwargs if name not in default_parallel_layer_specific_kwargs ] logger.debug( - f'The following arguments are not allowed for {cls.__name__}: {", ".join(wrong_argument_names)}, they ' + f"The following arguments are not allowed for {cls.__name__}: {', '.join(wrong_argument_names)}, they " "will be ignored." ) diff --git a/optimum/neuron/distributed/parallelizers_manager.py b/optimum/neuron/distributed/parallelizers_manager.py index 5f3cc2df3..a0a4fce89 100644 --- a/optimum/neuron/distributed/parallelizers_manager.py +++ b/optimum/neuron/distributed/parallelizers_manager.py @@ -28,7 +28,7 @@ def parallelizer_classes_resolver( - model_type_to_parallelizer_class_name: Dict[str, str] + model_type_to_parallelizer_class_name: Dict[str, str], ) -> Dict[str, Type[Parallelizer]]: modules = [] for module_name in _PARALLELIZER_CLASSES_MODULE_NAMES: diff --git a/optimum/neuron/modeling.py b/optimum/neuron/modeling.py index fa5681d26..7edbf1c61 100644 --- a/optimum/neuron/modeling.py +++ b/optimum/neuron/modeling.py @@ -179,9 +179,7 @@ def forward( # last_hidden_state -> (batch_size, sequencen_len, hidden_size) last_hidden_state = self.remove_padding( [outputs[0]], dims=[0, 1], indices=[input_ids.shape[0], input_ids.shape[1]] - )[ - 0 - ] # Remove padding on batch_size(0), and sequence_length(1) + )[0] # Remove padding on batch_size(0), and sequence_length(1) if len(outputs) > 1: # pooler_output -> (batch_size, hidden_size) pooler_output = self.remove_padding([outputs[1]], dims=[0], indices=[input_ids.shape[0]])[ @@ -264,9 +262,7 @@ def forward( # token_embeddings -> (batch_size, sequencen_len, hidden_size) token_embeddings = self.remove_padding( [outputs[0]], dims=[0, 1], indices=[input_ids.shape[0], input_ids.shape[1]] - )[ - 0 - ] # Remove padding on batch_size(0), and sequence_length(1) + )[0] # Remove padding on batch_size(0), and sequence_length(1) # sentence_embedding -> (batch_size, hidden_size) sentence_embedding = self.remove_padding([outputs[1]], dims=[0], indices=[input_ids.shape[0]])[ 0 diff --git a/optimum/neuron/modeling_diffusion.py b/optimum/neuron/modeling_diffusion.py index 53f2df76e..944d76ebe 100644 --- a/optimum/neuron/modeling_diffusion.py +++ b/optimum/neuron/modeling_diffusion.py @@ -1183,7 +1183,9 @@ def forward( if output_hidden_states: assert ( self.config.output_hidden_states or self.config.neuron.get("output_hidden_states") - ) == output_hidden_states, "output_hidden_states is expected to be False since the model was compiled without hidden_states as output." + ) == output_hidden_states, ( + "output_hidden_states is expected to be False since the model was compiled without hidden_states as output." + ) input_ids = input_ids.to(torch.long) # dummy generator uses long int for tracing inputs = (input_ids,) diff --git a/optimum/neuron/models/granite/hlo.py b/optimum/neuron/models/granite/hlo.py index d66f12b8d..59330b438 100644 --- a/optimum/neuron/models/granite/hlo.py +++ b/optimum/neuron/models/granite/hlo.py @@ -35,7 +35,6 @@ def scale_mul(t, scale): class GraniteForSamplingNoEmbeddingHlo: - def __init__(self, config: GraniteConfig, neuron_config: Optional[NeuronConfig] = None): self.config = config self.neuron_config = neuron_config @@ -324,9 +323,9 @@ def layer( tp_degree=self.config.tp_degree, ) if self.neuron_config.fuse_mlp: - assert all( - (not (x) for x in [in0_weight, in1_weight, out_weight, in0_scales, in1_scales, out_scales]) - ), "in0, in1 and out weights have to be None" + assert all((not (x) for x in [in0_weight, in1_weight, out_weight, in0_scales, in1_scales, out_scales])), ( + "in0, in1 and out weights have to be None" + ) in0_weight, in0_scales = mlp_in_weight, mlp_in_scales out_weight, out_scales = mlp_out_weight, mlp_out_scales @@ -688,7 +687,6 @@ def attention( # Single Token Generation ("Prefetch"-style) ans speculative forward if active_mask is not None: - n_active_tokens = key.sizes[1] if bsh_cache_layout else key.sizes[0] if n_active_tokens > 1 and self.neuron_config and self.neuron_config.continuous_batching: # For speculative forward + continuous batching, slice out samples in the batch size diff --git a/optimum/neuron/models/granite/model.py b/optimum/neuron/models/granite/model.py index ddd3aecf2..7b706ef19 100644 --- a/optimum/neuron/models/granite/model.py +++ b/optimum/neuron/models/granite/model.py @@ -159,9 +159,9 @@ def load_weights(self): # Note: Automatic MLP padding is safe since zeros are *only* introduced to intermediary state if self.neuron_config.fuse_mlp: - assert all( - getattr(mlp, attr, None) for attr in ["gate_proj", "up_proj"] - ), "fuse_mlp need to have gate and up proj weights" + assert all(getattr(mlp, attr, None) for attr in ["gate_proj", "up_proj"]), ( + "fuse_mlp need to have gate and up proj weights" + ) assert all( getattr(mlp, attr, None).weight.shape[0] % self.config.tp_degree == 0 for attr in ["gate_proj", "up_proj"] diff --git a/optimum/neuron/models/granite/modules.py b/optimum/neuron/models/granite/modules.py index 4cbbcc9f3..84b36a38d 100644 --- a/optimum/neuron/models/granite/modules.py +++ b/optimum/neuron/models/granite/modules.py @@ -18,7 +18,6 @@ class GraniteForCausalLM(module.PretrainedModel): - def __init__(self, config: GraniteConfig): super().__init__() dtype, _, _ = utils.parse_amp(config.amp) @@ -34,7 +33,6 @@ def get_base_model(self): class GraniteModel(module.LowMemoryModule): - def __init__(self, config: GraniteConfig): super().__init__() self.embed_tokens = module.LowMemoryEmbedding(config.vocab_size, config.hidden_size) @@ -45,14 +43,12 @@ def __init__(self, config: GraniteConfig): class GraniteRMSNorm(module.LowMemoryModule): - def __init__(self, config: GraniteConfig) -> None: super().__init__() self.weight = module.UninitializedParameter() class GraniteDecoderLayer(module.LowMemoryModule): - def __init__(self, config: GraniteConfig): super().__init__() self.self_attn = GraniteAttention(config) @@ -62,7 +58,6 @@ def __init__(self, config: GraniteConfig): class GraniteAttention(module.LowMemoryModule): - def __init__(self, config: GraniteConfig): super().__init__() self.hidden_size = config.hidden_size @@ -77,7 +72,6 @@ def __init__(self, config: GraniteConfig): class GraniteMLP(module.LowMemoryModule): - def __init__(self, config: GraniteConfig): super().__init__() dtype, _, _ = utils.parse_amp(config.amp) diff --git a/optimum/neuron/models/qwen2/model.py b/optimum/neuron/models/qwen2/model.py index 8ee60d9b4..8396a8fba 100644 --- a/optimum/neuron/models/qwen2/model.py +++ b/optimum/neuron/models/qwen2/model.py @@ -156,9 +156,9 @@ def load_weights(self): # Note: Automatic MLP padding is safe since zeros are *only* introduced to intermediary state if self.neuron_config.fuse_mlp: - assert all( - getattr(mlp, attr, None) for attr in ["gate_proj", "up_proj"] - ), "fuse_mlp need to have gate and up proj weights" + assert all(getattr(mlp, attr, None) for attr in ["gate_proj", "up_proj"]), ( + "fuse_mlp need to have gate and up proj weights" + ) assert all( getattr(mlp, attr, None).weight.shape[0] % self.config.tp_degree == 0 for attr in ["gate_proj", "up_proj"] diff --git a/optimum/neuron/utils/cache_utils.py b/optimum/neuron/utils/cache_utils.py index 28845713c..84a760988 100644 --- a/optimum/neuron/utils/cache_utils.py +++ b/optimum/neuron/utils/cache_utils.py @@ -62,7 +62,7 @@ def load_custom_cache_repo_name_from_hf_home( - hf_home_cache_repo_file: Union[str, Path] = HF_HOME_CACHE_REPO_FILE + hf_home_cache_repo_file: Union[str, Path] = HF_HOME_CACHE_REPO_FILE, ) -> Optional[str]: if Path(hf_home_cache_repo_file).exists(): with open(hf_home_cache_repo_file, "r") as fp: diff --git a/optimum/neuron/utils/peft_utils.py b/optimum/neuron/utils/peft_utils.py index 7780ff7ed..4866669ac 100644 --- a/optimum/neuron/utils/peft_utils.py +++ b/optimum/neuron/utils/peft_utils.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Utilities related to the PEFT library and support.""" + import collections import functools import os diff --git a/optimum/neuron/utils/version_utils.py b/optimum/neuron/utils/version_utils.py index 818e2bc1e..368c4d186 100644 --- a/optimum/neuron/utils/version_utils.py +++ b/optimum/neuron/utils/version_utils.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Version utilities.""" + from typing import Optional from packaging import version diff --git a/pyproject.toml b/pyproject.toml index 242fb9579..b7f0fb60d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,9 @@ ignore = ["C901", "E501", "E741", "W605"] select = ["C", "E", "F", "I", "W"] exclude = ["*.ipynb"] +[tool.ruff.format] +exclude = ["*.ipynb"] + # Ignore import violations in all `__init__.py` files. [tool.ruff.lint.per-file-ignores] "__init__.py" = ["E402", "F401", "F403", "F811"] diff --git a/tests/cli/test_neuron_cache_cli.py b/tests/cli/test_neuron_cache_cli.py index 0d7887445..c8b6da574 100644 --- a/tests/cli/test_neuron_cache_cli.py +++ b/tests/cli/test_neuron_cache_cli.py @@ -50,18 +50,18 @@ def _optimum_neuron_cache_create(self, cache_repo_id: Optional[str] = None, publ try: repo_id = cache_repo_id if cache_repo_id is not None else CACHE_REPO_NAME info = HfApi().repo_info(repo_id, repo_type="model") - assert info.private == ( - not public - ), "The privacy of the repo should match the presence of the --public flag." + assert info.private == (not public), ( + "The privacy of the repo should match the presence of the --public flag." + ) except RepositoryNotFoundError: pytest.fail("The repo was not created.") finally: delete_repo(repo_id) - assert ( - repo_id == load_custom_cache_repo_name_from_hf_home() - ), f"Saved local Neuron cache name should be equal to {repo_id}." + assert repo_id == load_custom_cache_repo_name_from_hf_home(), ( + f"Saved local Neuron cache name should be equal to {repo_id}." + ) def test_optimum_neuron_cache_create_with_custom_name(self, hub_test): seed = random.randint(0, 100) @@ -79,9 +79,9 @@ def test_optimum_neuron_cache_set(self, hub_test): p = subprocess.Popen(command) returncode = p.wait() assert returncode == 0 - assert ( - repo_id == load_custom_cache_repo_name_from_hf_home() - ), f"Saved local Neuron cache name should be equal to {repo_id}." + assert repo_id == load_custom_cache_repo_name_from_hf_home(), ( + f"Saved local Neuron cache name should be equal to {repo_id}." + ) def test_optimum_neuron_cache_add(self, hub_test): with TemporaryDirectory() as tmpdir: diff --git a/tests/test_generate.py b/tests/test_generate.py index 706e3538b..a4b054240 100644 --- a/tests/test_generate.py +++ b/tests/test_generate.py @@ -104,12 +104,12 @@ def test_greedy_decoding(self, model_name, use_cache, decoder_only, compiler_fla cpu_samples = _test_generative_decoding(model_name=model_name, device="cpu", decoder_only=decoder_only) - assert np.array_equal( - cpu_samples, xla_neuron_samples_fp32 - ), "XLA Neuron FP32 output doesn't match CPU only output" - assert np.array_equal( - cpu_samples, xla_neuron_samples_bf16 - ), "XLA Neuron bf16 output doesn't match CPU only output" + assert np.array_equal(cpu_samples, xla_neuron_samples_fp32), ( + "XLA Neuron FP32 output doesn't match CPU only output" + ) + assert np.array_equal(cpu_samples, xla_neuron_samples_bf16), ( + "XLA Neuron bf16 output doesn't match CPU only output" + ) @parameterized.expand(BEAM_SEARCH_TESTDATA) @pytest.mark.skip("Remove once generate fix (#262) has been merged.") @@ -130,9 +130,9 @@ def test_beam_search_decoding(self, model_name, use_cache, decoder_only, compile model_name=model_name, device="cpu", decoder_only=decoder_only, generation_config_update=config_update ) - assert np.array_equal( - cpu_samples, xla_neuron_samples_fp32 - ), "XLA Neuron FP32 output doesn't match CPU only output" - assert np.array_equal( - cpu_samples, xla_neuron_samples_bf16 - ), "XLA Neuron bf16 output doesn't match CPU only output" + assert np.array_equal(cpu_samples, xla_neuron_samples_fp32), ( + "XLA Neuron FP32 output doesn't match CPU only output" + ) + assert np.array_equal(cpu_samples, xla_neuron_samples_bf16), ( + "XLA Neuron bf16 output doesn't match CPU only output" + ) diff --git a/tests/test_trainers.py b/tests/test_trainers.py index 17f79248c..0d1650362 100644 --- a/tests/test_trainers.py +++ b/tests/test_trainers.py @@ -267,12 +267,12 @@ def test_train_and_eval_use_remote_cache(self, hub_test_with_local_cache, tmpdir # TODO: investigate that, not urgent. assert files_in_repo == last_files_in_repo, "No file should have been added to the Hub after first training." - assert ( - files_in_cache == last_files_in_cache - ), "No file should have been added to the cache after first training." - assert ( - second_training_duration < first_training_duration - ), "Second training should be faster because cached graphs can be used." + assert files_in_cache == last_files_in_cache, ( + "No file should have been added to the cache after first training." + ) + assert second_training_duration < first_training_duration, ( + "Second training should be faster because cached graphs can be used." + ) @pytest.mark.skip("Test in later release") def test_save_and_resume_from_checkpoint(self, parallel_sizes, tmpdir): diff --git a/tests/test_utils.py b/tests/test_utils.py index 2b84d7a52..4614f4944 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -53,9 +53,9 @@ def test_patch_model(): pass wav2vec2_model = Wav2Vec2Model(Wav2Vec2Config()) - assert ( - wav2vec2_model.config.layerdrop > 0 - ), "Default Wav2vec2Config layerdrop value is already 0 so the test will not check anything." + assert wav2vec2_model.config.layerdrop > 0, ( + "Default Wav2vec2Config layerdrop value is already 0 so the test will not check anything." + ) patching_specs = [] for spec in MODEL_PATCHING_SPECS: patching_specs.append((wav2vec2_model,) + spec) diff --git a/text-generation-inference/server/text_generation_server/model.py b/text-generation-inference/server/text_generation_server/model.py index c4a692c95..e8cb34ee1 100644 --- a/text-generation-inference/server/text_generation_server/model.py +++ b/text-generation-inference/server/text_generation_server/model.py @@ -56,7 +56,7 @@ def log_cache_size(): if os.path.exists(path): usage = shutil.disk_usage(path) gb = 2**30 - logger.info(f"Cache disk [{path}]: total = {usage.total/gb:.2f} G, free = {usage.free/gb:.2f} G") + logger.info(f"Cache disk [{path}]: total = {usage.total / gb:.2f} G, free = {usage.free / gb:.2f} G") else: raise ValueError(f"The cache directory ({path}) does not exist.") diff --git a/text-generation-inference/tgi_env.py b/text-generation-inference/tgi_env.py index 6855b468a..ff647c988 100755 --- a/text-generation-inference/tgi_env.py +++ b/text-generation-inference/tgi_env.py @@ -50,7 +50,7 @@ def parse_cmdline_and_set_env(argv: List[str] = None) -> argparse.Namespace: args = parser.parse_known_args(argv)[0] if not args.model_id: - raise Exception("No model id provided ! Either specify it using --model-id cmdline " "or MODEL_ID env var") + raise Exception("No model id provided ! Either specify it using --model-id cmdline or MODEL_ID env var") # Override env with cmdline params os.environ["MODEL_ID"] = args.model_id @@ -109,7 +109,7 @@ def lookup_compatible_cached_model(model_id: str, revision: Optional[str]) -> Op if not all_compatible: logger.debug( - "No compatible cached entry found for model %s, env %s, available cores %s, " "neuronxcc version %s", + "No compatible cached entry found for model %s, env %s, available cores %s, neuronxcc version %s", model_id, get_env_dict(), available_cores, @@ -139,7 +139,7 @@ def check_env_and_neuron_config_compatibility(neuron_config: Dict[str, Any], che if check_compiler_version and neuron_config["compiler_version"] != neuronxcc_version: logger.debug( - "Compiler version conflict, the local one " "(%s) differs from the one used to compile the model (%s)", + "Compiler version conflict, the local one (%s) differs from the one used to compile the model (%s)", neuronxcc_version, neuron_config["compiler_version"], ) @@ -163,7 +163,7 @@ def check_env_and_neuron_config_compatibility(neuron_config: Dict[str, Any], che sequence_length = neuron_config["sequence_length"] if max_input_tokens >= sequence_length: logger.debug( - "Specified max input tokens is not compatible with config sequence length " "( %s >= %s)", + "Specified max input tokens is not compatible with config sequence length ( %s >= %s)", max_input_tokens, sequence_length, ) @@ -205,7 +205,7 @@ def main(): if not compatible: env_dict = get_env_dict() msg = ( - "Invalid neuron config and env. Config {}, env {}, available cores {}, " "neuronxcc version {}" + "Invalid neuron config and env. Config {}, env {}, available cores {}, neuronxcc version {}" ).format(neuron_config, env_dict, available_cores, neuronxcc_version) logger.error(msg) raise Exception(msg) @@ -213,9 +213,9 @@ def main(): neuron_config = lookup_compatible_cached_model(args.model_id, args.revision) if not neuron_config: - msg = ( - "No compatible neuron config found. Provided env {}, " "available cores {}, neuronxcc version {}" - ).format(get_env_dict(), available_cores, neuronxcc_version) + msg = ("No compatible neuron config found. Provided env {}, available cores {}, neuronxcc version {}").format( + get_env_dict(), available_cores, neuronxcc_version + ) logger.error(msg) raise Exception(msg) diff --git a/tools/auto_fill_inference_cache.py b/tools/auto_fill_inference_cache.py index faa1e6e2d..e1772d784 100644 --- a/tools/auto_fill_inference_cache.py +++ b/tools/auto_fill_inference_cache.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Script to cache models for inference.""" + import argparse import json import logging