pipeline defined

abdouaziz · Mar 18, 2022 · ab91d89 · ab91d89
1 parent 6329d63
commit ab91d89
Show file tree

Hide file tree

Showing 5 changed files with 114 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -65,10 +65,64 @@ audio_file = "audio.wav"
 prediction = asr.predict(audio_file)
 ```
 
+# Pipeline
+
+The pipelines are a great and easy way to use models for inference. These pipelines are objects that abstract most of the complex code from the library,offering a simple API dedicated to several tasks Masked Language Modeling, Sentiment Analysis .
+
+
+
+
+**bert-base-wolof** is pretrained bert-base model on wolof language  .
+**sora-wolof** is pretrained roberta model on wolof language  .
+
+## Models in Wolof library
+
+| Model name | Number of layers | Attention Heads | Embedding Dimension | Total Parameters |
+| :------:       |   :---: | :---: | :---: | :---: |
+| `bert-base-wolof` | 6    | 12   | 514   | 56931622 M |
+| `soraberta-base` | 6    | 12   | 514   | 83 M |
+	 
+
+## Using Soraberta or BERT-base-wolof
+
+```python
+>>> from wolof import Pipeline
+>>> unmasker = Pipeline(task='fill-mask', model_name='abdouaziiz/bert-base-wolof')
+>>> unmasker("kuy yoot du [MASK].")
+
+[{'sequence': '[CLS] kuy yoot du seqet. [SEP]',
+	'score': 0.09505125880241394,
+	'token': 13578},
+	{'sequence': '[CLS] kuy yoot du daw. [SEP]',
+	'score': 0.08882280439138412,
+	'token': 679},
+	{'sequence': '[CLS] kuy yoot du yoot. [SEP]',
+	'score': 0.057790059596300125,
+	'token': 5117},
+	{'sequence': '[CLS] kuy yoot du seqat. [SEP]',
+	'score': 0.05671025067567825,
+	'token': 4992},
+	{'sequence': '[CLS] kuy yoot du yaqu. [SEP]',
+	'score': 0.0469999685883522,
+	'token': 1735}]
+```
+
+
+for ***`task`***  we can have the following values: 'fill-mask', 'sentiment-analysis'
+
+
+
+
+
+
 You can checkout examples in `examples/`
 
 <hr>
 
+
+
+
+
 ## Author
 - Abdou Aziz DIOP @abdouaziz
 - email : [email protected]

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="wolof",
-    version="0.0.1",
+    version="0.0.3",
     author="Abdou Aziz DIOP",
     author_email="[email protected]",
     description="wolof is a python library for the Wolof language",

diff --git a/src/wolof/__init__.py b/src/wolof/__init__.py
@@ -0,0 +1,3 @@
+from .asr import * 
+
+from .model import *
diff --git a/src/wolof/asr.py b/src/wolof/asr.py
@@ -5,22 +5,50 @@
 
 
 class Speech2Text:
-
+    """
+    Speech2Text class for Speech Recognition in Wolof language
+    """
     def __init__(self, model_name="abdouaziiz/wav2vec2-xls-r-300m-wolof"):
+        """
+        Initialize the model
+
+        Args:
+            model_name (str): The name of the model to load
+        
+        """
         self.model = Wav2Vec2ForCTC.from_pretrained(model_name)
         self.processor = Wav2Vec2Processor.from_pretrained(model_name)
 
     def wav2feature(self, path):
-
+        """
+        Convert a wav file to a feature vector
+            
+            Args:
+                path (str): The path to the wav file
+    
+            Returns:
+                torch.tensor: The feature vector
+        """
+
         speech_array, sampling_rate = librosa.load(path , sr=16000) 
         return self.processor(speech_array, sampling_rate=sampling_rate, padding=True ,return_tensors="pt" ).input_values
 
     def feature2logits(self, features):
+        """
+        Convert a feature vector to logits
+                
+                Args:
+                    features (torch.tensor): The feature vector
+        
+                Returns:
+                    torch.tensor: The logits
+            """
         with torch.no_grad():
             return self.model(features).logits
 
     def __call__(self, path):
-
+
+
         logits = self.feature2logits(self.wav2feature(path))
         pred_ids = torch.argmax(logits, dim=-1)
 

diff --git a/src/wolof/model.py b/src/wolof/model.py
@@ -0,0 +1,25 @@
+
+from transformers import pipeline
+
+
+
+
+class Pipeline(object):
+    """
+    The pipelines are a great and easy way to use models for inference. These pipelines are objects that abstract most of the complex code from the library,
+    offering a simple API dedicated to several tasks, including Named Entity Recognition, Masked Language Modeling, 
+    Sentiment Analysis, Feature Extraction and Question Answering. 
+    """
+    def __init__(self, task , model_name="abdouaziiz/bert-base-wolof"):
+        """
+        Initialize the model
+        Args:
+            model_name (str): The name of the model to load
+        """
+        self.task = task
+        self.model_name = model_name
+        self.pipe = pipeline(self.task, model=self.model_name)
+
+
+    def __call__(self, text):
+        return self.pipe(text)