Add support for Mistral models (#411)

* feat(text-generation): add support for Mistral models * doc: add mistral to supported architectures * test: use correct timy mistral model
huggingface · Jan 16, 2024 · 43d2f90 · 43d2f90
1 parent 104bd64
commit 43d2f90
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 1 deletion.
diff --git a/docs/source/package_reference/export.mdx b/docs/source/package_reference/export.mdx
@@ -66,6 +66,7 @@ Since many architectures share similar properties for their Neuron configuration
 | FlauBERT               | feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification                                 |
 | GPT2                   | text-generation                                                                                                                               |
 | Llama, Llama 2         | text-generation                                                                                                                               |
+| Mistral                | text-generation                                                                                                                               |
 | MobileBERT             | feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification                                 |
 | MPNet                  | feature-extraction, fill-mask, multiple-choice, question-answering, text-classification, token-classification                                 |
 | OPT                    | text-generation                                                                                                                               |

diff --git a/optimum/exporters/neuron/model_configs.py b/optimum/exporters/neuron/model_configs.py
@@ -471,3 +471,8 @@ def generate_io_aliases(self, model):
             aliases[model.past_key_values_ca[i]] = len(model.past_key_values_sa) + i + num_outputs_from_trace
 
         return aliases
+
+
+@register_in_tasks_manager("mistral", "text-generation")
+class MistralNeuronConfig(TextNeuronDecoderConfig):
+    NEURONX_CLASS = "mistral.model.MistralForSampling"
diff --git a/tests/generation/conftest.py b/tests/generation/conftest.py
@@ -22,11 +22,12 @@
 from optimum.utils.testing_utils import USER
 
 
-DECODER_MODEL_ARCHITECTURES = ["bloom", "gpt2", "llama", "opt"]
+DECODER_MODEL_ARCHITECTURES = ["bloom", "gpt2", "llama", "mistral", "opt"]
 DECODER_MODEL_NAMES = {
     "bloom": "hf-internal-testing/tiny-random-BloomForCausalLM",
     "gpt2": "hf-internal-testing/tiny-random-gpt2",
     "llama": "dacorvo/tiny-random-llama",
+    "mistral": "dacorvo/tiny-random-MistralForCausalLM",
     "opt": "hf-internal-testing/tiny-random-OPTForCausalLM",
 }
 SEQ2SEQ_MODEL_NAMES = {