Merge PR #288: fix absolufy-imports typo in requirements-dev.txt

glennko · glennko · commit 3934a409f541 · 2025-09-17T11:37:24.000-04:00
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -0,0 +1,13 @@
+### Summary
+<!-- Briefly describe what changes this PR introduces, including any relevant context or motivation. -->
+
+
+### Checklist
+<!-- List any steps or commands that reviewers can follow to validate this change. -->
+
+- [ ] Tested
+- [ ] Documented
+
+### Additional Information
+
+<!-- Include any additional notes or context that you think would be helpful for reviewers. -->
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
   <img src=".github/stochastic_logo_light.svg#gh-light-mode-only" width="250" alt="Stochastic.ai"/>
   <img src=".github/stochastic_logo_dark.svg#gh-dark-mode-only" width="250" alt="Stochastic.ai"/>
 </p>
-<h3 align="center">Build, customize and control your own personal LLMs</h3>
+<h3 align="center">Build, modify, and control your own personalized LLMs</h3>
 
 <p align="center">
   <a href="https://pypi.org/project/xturing/">
@@ -15,13 +15,14 @@
     <img src="https://img.shields.io/badge/Chat-FFFFFF?logo=discord&style=for-the-badge"/>
   </a>
 </p>
+
 <br>
 
 ___
 
-`xTuring` provides fast, efficient and simple fine-tuning of LLMs, such as LLaMA, GPT-J, Galactica, and more.
+`xTuring` provides fast, efficient and simple fine-tuning of open-source LLMs, such as Mistral, LLaMA, GPT-J, and more.
 By providing an easy-to-use interface for fine-tuning LLMs to your own data and application, xTuring makes it
-simple to build, customize and control LLMs. The entire process can be done inside your computer or in your
+simple to build, modify, and control LLMs. The entire process can be done inside your computer or in your
 private cloud, ensuring data privacy and security.
 
 With `xTuring` you can,
@@ -33,6 +34,38 @@ With `xTuring` you can,
 
 <br>
 
+## ⚙️ Installation
+```bash
+pip install xturing
+```
+
+<br>
+
+## 🚀 Quickstart
+
+```python
+from xturing.datasets import InstructionDataset
+from xturing.models import BaseModel
+
+# Load the dataset
+instruction_dataset = InstructionDataset("./examples/models/llama/alpaca_data")
+
+# Initialize the model
+model = BaseModel.create("llama_lora")
+
+# Finetune the model
+model.finetune(dataset=instruction_dataset)
+
+# Perform inference
+output = model.generate(texts=["Why LLM models are becoming so important?"])
+
+print("Generated output by the model: {}".format(output))
+```
+
+You can find the data folder [here](examples/models/llama/alpaca_data).
+
+<br>
+
 ## 🌟 What's new?
 We are excited to announce the latest enhancements to our `xTuring` library:
 1. __`LLaMA 2` integration__ - You can use and fine-tune the _`LLaMA 2`_ model in different configurations: _off-the-shelf_, _off-the-shelf with INT8 precision_, _LoRA fine-tuning_, _LoRA fine-tuning with INT8 precision_ and _LoRA fine-tuning with INT4 precision_ using the `GenericModel` wrapper and/or you can use the `Llama2` class from `xturing.models` to test and finetune the model.
@@ -45,7 +78,7 @@ from xturing.models import BaseModel
 model = BaseModel.create('llama2')
 
 ```
-2. __`Evaluation`__ - Now you can evaluate any `Causal Language Model` on any dataset. The metrics currently supported is [`perplexity`](https://towardsdatascience.com/perplexity-in-language-models-87a196019a94).
+2. __`Evaluation`__ - Now you can evaluate any `Causal Language Model` on any dataset. The metrics currently supported is [`perplexity`](https://en.wikipedia.org/wiki/Perplexity).
 ```python
 # Make the necessary imports
 from xturing.datasets import InstructionDataset
@@ -118,38 +151,6 @@ For an extended insight, consider examining the [GenericModel working example](e
 
 <br>
 
-## ⚙️ Installation
-```bash
-pip install xturing
-```
-
-<br>
-
-## 🚀 Quickstart
-
-```python
-from xturing.datasets import InstructionDataset
-from xturing.models import BaseModel
-
-# Load the dataset
-instruction_dataset = InstructionDataset("./alpaca_data")
-
-# Initialize the model
-model = BaseModel.create("llama_lora")
-
-# Finetune the model
-model.finetune(dataset=instruction_dataset)
-
-# Perform inference
-output = model.generate(texts=["Why LLM models are becoming so important?"])
-
-print("Generated output by the model: {}".format(output))
-```
-
-You can find the data folder [here](examples/models/llama/alpaca_data).
-
-<br>
-
 ## CLI playground
 <img src=".github/cli-playground.gif" width="80%" style="margin: 0 1%;"/>
 
diff --git a/docs/docs/advanced/api_server.md b/docs/docs/advanced/api_server.md
@@ -4,7 +4,7 @@ description: FastAPI inference server
 sidebar_position: 3
 ---
 
-# ⚡️ Running model inference with FastAPI Ssrver
+# ⚡️ Running model inference with FastAPI Server
 
 <!-- Once you have fine-tuned your model, you can run the inference using a FastAPI server. -->
 After successfully fine-tuning your model, you can perform inference using a FastAPI server. The following steps guide you through launching and utilizing the API server for your fine-tuned model.
diff --git a/docs/docs/overview/supported_models.md b/docs/docs/overview/supported_models.md
@@ -31,6 +31,7 @@ description: Models Supported by xTuring
 ### INT4 Precision model versions
 > In order to load any model's __`INT4+LoRA`__ version, you will need to make use of `GenericLoraKbitModel` class from `xturing.models`. Below is how to use it:
 ```python
+from xturing.models import GenericLoraKbitModel
 model = GenericLoraKbitModel('/path/to/model')
 ```
 The `/path/to/model` can be replaced with you local directory or any HuggingFace library model like `facebook/opt-1.3b`.
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,7 +43,7 @@ keywords = [
 dependencies = [
     "torch >= 1.9.0",
     "pytorch-lightning",
-    "transformers==4.31.0",
+    "transformers==4.39.3",
     "datasets==2.14.5",
     "evaluate==0.4.0",
     "bitsandbytes==0.41.1",
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,4 +1,4 @@
 pre-commit
 pytest
 autoflake
-absoulify-imports
+absolufy-imports
diff --git a/src/xturing/config/finetuning_config.yaml b/src/xturing/config/finetuning_config.yaml
@@ -298,6 +298,10 @@ llama2_lora_kbit:
   num_train_epochs: 3
   optimizer_name: cpu_adam
 
+mamba:
+  learning_rate: 5e-5
+  weight_decay: 0.01
+
 opt:
   learning_rate: 5e-5
   weight_decay: 0.01
diff --git a/src/xturing/config/generation_config.yaml b/src/xturing/config/generation_config.yaml
@@ -252,6 +252,10 @@ llama2_lora_kbit:
   max_new_tokens: 256
   do_sample: false
 
+# Greedy search
+mamba:
+  do_sample: false
+
 # Contrastive search
 opt:
   penalty_alpha: 0.6
diff --git a/src/xturing/engines/__init__.py b/src/xturing/engines/__init__.py
@@ -58,6 +58,7 @@
     LlamaLoraInt8Engine,
     LlamaLoraKbitEngine,
 )
+from xturing.engines.mamba_engine import MambaEngine
 from xturing.engines.opt_engine import (
     OPTEngine,
     OPTInt8Engine,
@@ -107,6 +108,7 @@
 BaseEngine.add_to_registry(LLama2LoraEngine.config_name, LLama2LoraEngine)
 BaseEngine.add_to_registry(LLama2LoraInt8Engine.config_name, LLama2LoraInt8Engine)
 BaseEngine.add_to_registry(LLama2LoraKbitEngine.config_name, LLama2LoraKbitEngine)
+BaseEngine.add_to_registry(MambaEngine.config_name, MambaEngine)
 BaseEngine.add_to_registry(OPTEngine.config_name, OPTEngine)
 BaseEngine.add_to_registry(OPTInt8Engine.config_name, OPTInt8Engine)
 BaseEngine.add_to_registry(OPTLoraEngine.config_name, OPTLoraEngine)
diff --git a/src/xturing/engines/mamba_engine.py b/src/xturing/engines/mamba_engine.py
@@ -0,0 +1,22 @@
+import os
+from pathlib import Path
+from typing import Optional, Union
+
+from transformers import AutoTokenizer, MambaForCausalLM
+
+from xturing.engines.causal import CausalEngine
+
+class MambaEngine(CausalEngine):
+    config_name: str = "mamba_engine"
+
+    def __init__(self, weights_path: Optional[Union[str, Path]] = None):
+        model_name = "state-spaces/mamba-2.8b-hf"
+        model = MambaForCausalLM.from_pretrained(model_name)
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+        super().__init__(weights_path=weights_path, model=model, tokenizer=tokenizer)
+
+
+    def save(self, saving_path: Union[str, Path]):
+        self.model.save_pretrained(saving_path)
+        self.tokenizer.save_pretrained(saving_path)
diff --git a/src/xturing/models/__init__.py b/src/xturing/models/__init__.py
@@ -43,6 +43,7 @@
     Llama2LoraInt8,
     Llama2LoraKbit,
 )
+from xturing.models.mamba import Mamba
 from xturing.models.opt import OPT, OPTInt8, OPTLora, OPTLoraInt8
 from xturing.models.stable_diffusion import StableDiffusion
 
@@ -88,6 +89,7 @@
 BaseModel.add_to_registry(Llama2Lora.config_name, Llama2Lora)
 BaseModel.add_to_registry(Llama2LoraInt8.config_name, Llama2LoraInt8)
 BaseModel.add_to_registry(Llama2LoraKbit.config_name, Llama2LoraKbit)
+BaseModel.add_to_registry(Mamba.config_name, Mamba)
 BaseModel.add_to_registry(OPT.config_name, OPT)
 BaseModel.add_to_registry(OPTInt8.config_name, OPTInt8)
 BaseModel.add_to_registry(OPTLora.config_name, OPTLora)
diff --git a/src/xturing/models/mamba.py b/src/xturing/models/mamba.py
@@ -0,0 +1,11 @@
+from typing import Optional
+
+from xturing.engines.mamba_engine import MambaEngine
+from xturing.models.causal import CausalModel
+
+
+class Mamba(CausalModel):
+    config_name: str = "mamba"
+
+    def __init__(self, weights_path: Optional[str] = None):
+        super().__init__(MambaEngine.config_name, weights_path)