From 05b4632d3bd80bab7ca1f1a47667c81686d1f60e Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Sat, 25 Nov 2023 03:15:26 +0100
Subject: [PATCH 01/16] [add] updates to readme

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index f42449d1..9316a822 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# NSL2AUDIOO
+# NSL-2-AUDIO
 
 [![LICENSE](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
 [![Python](https://img.shields.io/badge/python-3.10-blue.svg?style=flat-square)](https://www.python.org/)
@@ -52,13 +52,13 @@ $ make setup
 $ source $(poetry env info --path)/bin/activate
 ```
 
-## Project Roadmap
+### Project Roadmap
 
 Here's a glimpse of the exciting features we plan to implement in the coming weeks:
 
-
-
-## Citation
+- [x] Add project's documentation (you are reading it now), create issues and milestones, setup document's stub, suggest
+  page layout and styling.
+- [] Develop a Proof of Concept System
 
 ## Acknowledgments
 

From 2e7ac2848dd82b10a68d472e009e4f448f7df227 Mon Sep 17 00:00:00 2001
From: Ipadeola Ladipo Ezekiel <rileydrizzy@hotmail.com>
Date: Tue, 28 Nov 2023 23:37:24 +0000
Subject: [PATCH 02/16] [add] updates

---
 linguify_yb/src/models/baseline_transfomer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/linguify_yb/src/models/baseline_transfomer.py b/linguify_yb/src/models/baseline_transfomer.py
index 4a7120e3..dafcc488 100644
--- a/linguify_yb/src/models/baseline_transfomer.py
+++ b/linguify_yb/src/models/baseline_transfomer.py
@@ -175,5 +175,6 @@ def generate(self, source, target_start_token_idx):
             dec_input = torch.cat([dec_input, last_logit], dim=-1)
         return dec_input
 
+
 def build_model():
     pass

From a874fd3708de027c8f82aed3bbdcd5141ba82155 Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Sat, 9 Dec 2023 04:20:18 +0100
Subject: [PATCH 03/16] [add] updates

---
 README.md                                     |   4 -
 linguify_yb/README.md                         |  19 +-
 linguify_yb/development/dev.ipynb             | 340 ++----------------
 linguify_yb/src/dataset/dataset.py            |  13 +-
 linguify_yb/src/models/baseline_transfomer.py | 162 ++++++---
 linguify_yb/src/train.py                      |   2 +-
 linguify_yb/tests/test_data_ingestion.py      |   0
 linguify_yb/tests/test_model.py               |   0
 linguify_yb/tests/test_pipeline.py            |   0
 9 files changed, 164 insertions(+), 376 deletions(-)
 create mode 100644 linguify_yb/tests/test_data_ingestion.py
 create mode 100644 linguify_yb/tests/test_model.py
 create mode 100644 linguify_yb/tests/test_pipeline.py

diff --git a/README.md b/README.md
index 9316a822..12455609 100644
--- a/README.md
+++ b/README.md
@@ -68,10 +68,6 @@ I would like to acknowledge the outstanding contributions of :
 **Email:** <tejumade.afonja@aisaturdayslagos.com>  
 **GitHub:** [@tejuafonja](https://github.com/tejuafonja)
 
-**Name:** Fola Animashaun ***(```Mentor```)***  
-**Email:** <afolabianimashaun@yahoo.co.uk>  
-**GitHub:** [@Modinat-A](https://github.com/Modinat-A)
-
 ## Contact
 
 **Name:** **Ipadeola Ezekiel Ladipo**  
diff --git a/linguify_yb/README.md b/linguify_yb/README.md
index cc375adc..a5a84d35 100644
--- a/linguify_yb/README.md
+++ b/linguify_yb/README.md
@@ -10,4 +10,21 @@
 
 ***Overview:*** \
 
-# Project Roadmap
+## Project Roadmap
+
+- **[Month Year]:** Project Initiation
+- **[Month Year]:** Core Functionality Completion
+- **[Month Year]:** User Interface Design Completion
+- **[Month Year]:** Data Integration Completion
+- **[Month Year]:** Testing and Quality Assurance Completion
+- **[Month Year]:** Deployment to Production
+
+## How to Contribute
+
+We welcome contributions from the community. If you're interested in contributing, please refer to the [Contributing Guidelines](CONTRIBUTING.md).
+
+## Support and Contact
+
+If you have questions or need assistance, feel free to reach out to [Your Contact Information].
+
+---
diff --git a/linguify_yb/development/dev.ipynb b/linguify_yb/development/dev.ipynb
index 04ed67f6..273dbbb5 100644
--- a/linguify_yb/development/dev.ipynb
+++ b/linguify_yb/development/dev.ipynb
@@ -63,369 +63,78 @@
     "import torch\n",
     "from torch import nn\n",
     "import os\n",
-    "from torchprofile import profile_macs"
+    "#from torchprofile import profile_macs"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
-    "src = torch.randn((10, 32, 5))  # (sequence_length, batch_size, input_dim)\n",
-    "tgt = torch.randn((20, 32, 5))"
+    "logits = torch.rand(64,64)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "torch.Size([10, 32, 5])"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "src.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "5"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "src.size()[-1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
-    "\"\"\"doc\n",
-    "\"\"\"\n",
-    "\n",
-    "import torch\n",
-    "from torch import nn\n",
-    "\n",
-    "\n",
-    "class TokenEmbedding(nn.Module):\n",
-    "    def __init__(self, number_vocab=1000, max_len=100, number_hidden=64):\n",
-    "        super().__init__()\n",
-    "        self.postional_embedding_layers = nn.Embedding(number_vocab, number_hidden)\n",
-    "        self.embedding_layers = nn.Embedding(max_len, number_hidden)\n",
-    "\n",
-    "    def forward(self, input_x):\n",
-    "        max_len = input_x.size()[-1]\n",
-    "        input_x = self.embedding_layers(input_x)\n",
-    "        # Generate positions using torch.arange\n",
-    "        positions = torch.arange(0, max_len)\n",
-    "        positions = self.postional_embedding_layers(positions)\n",
-    "        return input_x + positions\n",
-    "\n",
-    "\n",
-    "class LandmarkEmbedding(nn.Module):\n",
-    "    def __init__(self, input_dim = None, number_hidden=64, max_len=100):\n",
-    "        super().__init__()\n",
-    "        self.conv1 = nn.Conv1d(\n",
-    "            in_channels=input_dim,\n",
-    "            out_channels=number_hidden,\n",
-    "            kernel_size=11,\n",
-    "            padding=\"same\",\n",
-    "            stride=1,\n",
-    "        )\n",
-    "        self.conv2 = nn.Conv1d(\n",
-    "            in_channels=number_hidden,\n",
-    "            out_channels=number_hidden,\n",
-    "            kernel_size=11,\n",
-    "            padding=\"same\",\n",
-    "            stride=1,\n",
-    "        )\n",
-    "        self.conv3 = nn.Conv1d(\n",
-    "            in_channels=number_hidden,\n",
-    "            out_channels=number_hidden,\n",
-    "            kernel_size=11,\n",
-    "            padding=\"same\",\n",
-    "            stride=1,\n",
-    "        )\n",
-    "        self.postions_embedding_layers = nn.Embedding(max_len, number_hidden)\n",
-    "        self.seq_nn = nn.Sequential(\n",
-    "            self.conv1, nn.ReLU(), self.conv2, nn.ReLU(), self.conv3, nn.ReLU()\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, input_x):\n",
-    "        outputs = self.seq_nn(input_x)\n",
-    "        return outputs\n",
-    "\n",
-    "\n",
-    "class Transformer(nn.Module):\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        input_dim,\n",
-    "        output_dim,\n",
-    "        source_maxlen=100,\n",
-    "        target_maxlen=100,\n",
-    "        no_multi_heads=6,\n",
-    "    ):\n",
-    "        super().__init__()\n",
-    "        num_encoder_layers = num_decoder_layers = 6\n",
-    "        encoder_forward_dim = 100\n",
-    "        # Define encoder and decoder layers\n",
-    "        self.encoder_layer = nn.TransformerEncoderLayer(\n",
-    "            d_model=input_dim,\n",
-    "            nhead=no_multi_heads,\n",
-    "            dim_feedforward=encoder_forward_dim,\n",
-    "            activation=\"relu\",\n",
-    "        )\n",
-    "\n",
-    "        self.decoder_layer = nn.TransformerDecoderLayer(\n",
-    "            d_model=input_dim,\n",
-    "            nhead=no_multi_heads,\n",
-    "            dim_feedforward=output_dim,\n",
-    "            activation=\"relu\",\n",
-    "        )\n",
-    "\n",
-    "        # Define encoder and decoder\n",
-    "        self.transformer_encoder = nn.TransformerEncoder(\n",
-    "            self.encoder_layer, num_layers=num_encoder_layers\n",
-    "        )\n",
-    "        self.transformer_decoder = nn.TransformerDecoder(\n",
-    "            self.decoder_layer, num_layers=num_decoder_layers\n",
-    "        )\n",
-    "\n",
-    "        # Input and output linear layers\n",
-    "        self.input_linear = LandmarkEmbedding(input_dim=input_dim,max_len=source_maxlen)\n",
-    "        self.target_linear = TokenEmbedding(max_len=target_maxlen)\n",
-    "        self.num_classes = 60\n",
-    "        self.output_linear = nn.Linear(output_dim, self.num_classes)\n",
-    "\n",
-    "    def forward(self, input_x, input_y):\n",
-    "        # Apply EMbedding\n",
-    "        input_x = self.input_linear(input_x)\n",
-    "\n",
-    "        # Transformer encoding\n",
-    "        memory = self.transformer_encoder(input_x)\n",
-    "\n",
-    "        # Apply linear layer to the target\n",
-    "        input_y = self.target_linear(input_y)\n",
-    "\n",
-    "        # Transformer decoding\n",
-    "        output = self.transformer_decoder(input_y, memory)\n",
-    "\n",
-    "        # Apply linear layer to the output\n",
-    "        output = self.output_linear(output)\n",
-    "\n",
-    "        return output\n",
-    "\n",
-    "    # TODO code generate for inference\n",
-    "    def generate(\n",
-    "        self,\n",
-    "    ):\n",
-    "        pass\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/transformer.py:282: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.self_attn.batch_first was not True(use batch_first for better inference performance)\n",
-      "  warnings.warn(f\"enable_nested_tensor is True, but self.use_nested_tensor is False because {why_not_sparsity_fast_path}\")\n"
-     ]
-    },
-    {
-     "ename": "IndexError",
-     "evalue": "index out of range in self",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[1;32m/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb Cell 7\u001b[0m line \u001b[0;36m1\n\u001b[1;32m     <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=13'>14</a>\u001b[0m tgt \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mrandn((batch_size, input_dim, input_dim))\u001b[39m.\u001b[39mlong()   \u001b[39m# (sequence_length, batch_size, input_dim)\u001b[39;00m\n\u001b[1;32m     <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=15'>16</a>\u001b[0m \u001b[39m# Forward pass\u001b[39;00m\n\u001b[0;32m---> <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=16'>17</a>\u001b[0m output \u001b[39m=\u001b[39m model(src, tgt)\n\u001b[1;32m     <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=18'>19</a>\u001b[0m \u001b[39m# Print the output shape\u001b[39;00m\n\u001b[1;32m     <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=19'>20</a>\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mOutput shape:\u001b[39m\u001b[39m\"\u001b[39m, output\u001b[39m.\u001b[39mshape)\n",
-      "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1516\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)  \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
-      "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1525\u001b[0m         \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1526\u001b[0m         \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m     \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m   1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m   1530\u001b[0m     result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n",
-      "\u001b[1;32m/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb Cell 7\u001b[0m line \u001b[0;36m1\n\u001b[1;32m    <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=102'>103</a>\u001b[0m memory \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransformer_encoder(input_x)\n\u001b[1;32m    <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=104'>105</a>\u001b[0m \u001b[39m# Apply linear layer to the target\u001b[39;00m\n\u001b[0;32m--> <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=105'>106</a>\u001b[0m input_y \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtarget_linear(input_y)\n\u001b[1;32m    <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=107'>108</a>\u001b[0m \u001b[39m# Transformer decoding\u001b[39;00m\n\u001b[1;32m    <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=108'>109</a>\u001b[0m output \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransformer_decoder(input_y, memory)\n",
-      "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1516\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)  \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
-      "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1525\u001b[0m         \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1526\u001b[0m         \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m     \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m   1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m   1530\u001b[0m     result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n",
-      "\u001b[1;32m/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb Cell 7\u001b[0m line \u001b[0;36m1\n\u001b[1;32m     <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=13'>14</a>\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mforward\u001b[39m(\u001b[39mself\u001b[39m, input_x):\n\u001b[1;32m     <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=14'>15</a>\u001b[0m     max_len \u001b[39m=\u001b[39m input_x\u001b[39m.\u001b[39msize()[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\n\u001b[0;32m---> <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=15'>16</a>\u001b[0m     input_x \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49membedding_layers(input_x)\n\u001b[1;32m     <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=16'>17</a>\u001b[0m     \u001b[39m# Generate positions using torch.arange\u001b[39;00m\n\u001b[1;32m     <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a372e7373682e77732d65753130352e676974706f642e696f222c2275736572223a2272696c65796472697a7a792d636f686f72743872616e732d6f626b726f766270317a37227d/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb#W6sdnNjb2RlLXJlbW90ZQ%3D%3D?line=17'>18</a>\u001b[0m     positions \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39marange(\u001b[39m0\u001b[39m, max_len)\n",
-      "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1516\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)  \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
-      "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1525\u001b[0m         \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1526\u001b[0m         \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m     \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m   1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m   1530\u001b[0m     result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n",
-      "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/sparse.py:162\u001b[0m, in \u001b[0;36mEmbedding.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    161\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mforward\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39minput\u001b[39m: Tensor) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tensor:\n\u001b[0;32m--> 162\u001b[0m     \u001b[39mreturn\u001b[39;00m F\u001b[39m.\u001b[39;49membedding(\n\u001b[1;32m    163\u001b[0m         \u001b[39minput\u001b[39;49m, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mweight, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpadding_idx, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmax_norm,\n\u001b[1;32m    164\u001b[0m         \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mnorm_type, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mscale_grad_by_freq, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msparse)\n",
-      "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/functional.py:2233\u001b[0m, in \u001b[0;36membedding\u001b[0;34m(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)\u001b[0m\n\u001b[1;32m   2227\u001b[0m     \u001b[39m# Note [embedding_renorm set_grad_enabled]\u001b[39;00m\n\u001b[1;32m   2228\u001b[0m     \u001b[39m# XXX: equivalent to\u001b[39;00m\n\u001b[1;32m   2229\u001b[0m     \u001b[39m# with torch.no_grad():\u001b[39;00m\n\u001b[1;32m   2230\u001b[0m     \u001b[39m#   torch.embedding_renorm_\u001b[39;00m\n\u001b[1;32m   2231\u001b[0m     \u001b[39m# remove once script supports set_grad_enabled\u001b[39;00m\n\u001b[1;32m   2232\u001b[0m     _no_grad_embedding_renorm_(weight, \u001b[39minput\u001b[39m, max_norm, norm_type)\n\u001b[0;32m-> 2233\u001b[0m \u001b[39mreturn\u001b[39;00m torch\u001b[39m.\u001b[39;49membedding(weight, \u001b[39minput\u001b[39;49m, padding_idx, scale_grad_by_freq, sparse)\n",
-      "\u001b[0;31mIndexError\u001b[0m: index out of range in self"
-     ]
-    }
-   ],
-   "source": [
-    "#RuntimeError: Given groups=1, weight of size [11, 64, 2], \n",
-    "#expected input[100, 32, 513] to have 64 channels, but got 32 channels instead\n",
-    "# Example usage:\n",
-    "input_dim = 513  # Adjust based on your input dimension\n",
-    "output_dim = 256  # Adjust based on your output dimension\n",
-    "nhead = 9\n",
-    "batch_size = 16\n",
-    "sequnce = 100\n",
-    "# Instantiate the model\n",
-    "model = Transformer(input_dim, output_dim,no_multi_heads=nhead)\n",
-    "\n",
-    "# Create dummy input\n",
-    "src = torch.randn((batch_size, input_dim, input_dim))  # (sequence_length, batch_size, input_dim)\n",
-    "tgt = torch.randn((batch_size, input_dim, input_dim)).long()  # (sequence_length, batch_size, input_dim)\n",
-    "\n",
-    "# Forward pass\n",
-    "output = model(src, tgt)\n",
-    "\n",
-    "# Print the output shape\n",
-    "print(\"Output shape:\", output.shape)"
+    "dec_logits = []\n",
+    "dec_input = (torch.ones((1), dtype=torch.long)* 60)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No error, the configuration is valid.\n"
-     ]
-    }
-   ],
-   "source": [
-    "input_dim = 513\n",
-    "num_heads = 9\n",
-    "\n",
-    "# Check if embed_dim is divisible by num_heads\n",
-    "if input_dim % num_heads != 0:\n",
-    "    print(\"Error: embed_dim must be divisible by num_heads\")\n",
-    "else:\n",
-    "    print(\"No error, the configuration is valid.\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,\n",
-       "        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,\n",
-       "        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,\n",
-       "        54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,\n",
-       "        72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,\n",
-       "        90, 91, 92, 93, 94, 95, 96, 97, 98, 99])"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "positions = torch.arange(0, 100)\n",
-    "positions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
-    "par = ['filepat','file2', 'file3']\n",
-    "id = [1,2,3,]\n",
-    "assert len(id)== len(par), 'failed'"
+    "logits = torch.argmax(logits, dim=-1, keepdim=True) \n",
+    "last_logit = logits[:, -1]\n",
+    "dec_logits.append(last_logit)\n",
+    "dec_input = torch.cat([dec_input, last_logit], dim=-1)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[('filepat', 1), ('file2', 2), ('file3', 3)]"
+       "torch.Size([64])"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "ans = list(zip(par,id))\n",
-    "ans"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "filepat\n",
-      "1\n",
-      "file2\n",
-      "2\n",
-      "file3\n",
-      "3\n"
-     ]
-    }
-   ],
-   "source": [
-    "for nu , id in ans:\n",
-    "    print(nu)\n",
-    "    print(id)"
+    "last_logit.shape"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'f'"
+       "torch.Size([65])"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "nu[0]"
+    "dec_input.shape"
    ]
   },
   {
@@ -433,7 +142,14 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "for idx in preds[i, :]:\n",
+    "    prediction += self.idx_to_char[idx]\n",
+    "    if idx == 60:\n",
+    "        break\n",
+    "print(f\"target:     {target_text.replace('-','')}\")\n",
+    "print(f\"prediction: {prediction}\\n\")"
+   ]
   }
  ],
  "metadata": {
@@ -452,7 +168,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.2"
   }
  },
  "nbformat": 4,
diff --git a/linguify_yb/src/dataset/dataset.py b/linguify_yb/src/dataset/dataset.py
index 2f31f4ce..e3b85bd3 100644
--- a/linguify_yb/src/dataset/dataset.py
+++ b/linguify_yb/src/dataset/dataset.py
@@ -10,8 +10,7 @@
 from torch.nn import functional as F
 from torch.utils.data import DataLoader, Dataset
 
-from linguify_yb.src.dataset.frames_config import (FEATURE_COLUMNS, LHAND_IDX,
-                                                   RHAND_IDX)
+from linguify_yb.src.dataset.frames_config import FEATURE_COLUMNS, LHAND_IDX, RHAND_IDX
 from linguify_yb.src.dataset.preprocess import frames_preprocess
 
 PHRASE_PATH = "data/asl-fingerspelling/character_to_prediction_index.json"
@@ -94,7 +93,7 @@ def __len__(self):
     def __getitem__(self, idx):
         if torch.is_tensor(idx):
             idx = idx.tolist()
-        phrase = self.labels[idx] 
+        phrase = self.labels[idx]
         frames = self.frames[idx]
 
         if self.trans:
@@ -102,6 +101,7 @@ def __getitem__(self, idx):
             frames = frames_preprocess(frames)
         return frames, phrase
 
+
 def pack_collate_func(batch):
     frames_feature = [item[0] for item in batch]
     phrase = [item[1] for item in batch]
@@ -121,9 +121,9 @@ def get_dataloader(file_path, file_id, batch_size):
     return dataloader
 
 
-
 # For Debugging Train Pipeline
 
+
 class TestDataset(Dataset):
     def __init__(self, num_samples=1000, input_size=10):
         self.num_samples = num_samples
@@ -137,6 +137,9 @@ def __len__(self):
     def __getitem__(self, idx):
         return self.data[idx], self.labels[idx]
 
+
 # Generating a dataset with 1000 samples and 10 input features
 testdataset = TestDataset(num_samples=1000, input_size=10)
-TEST_LOADER = DataLoader(dataset=testdataset, batch_size=1, num_workers=2, pin_memory= True)
\ No newline at end of file
+TEST_LOADER = DataLoader(
+    dataset=testdataset, batch_size=1, num_workers=2, pin_memory=True
+)
diff --git a/linguify_yb/src/models/baseline_transfomer.py b/linguify_yb/src/models/baseline_transfomer.py
index dafcc488..543980cc 100644
--- a/linguify_yb/src/models/baseline_transfomer.py
+++ b/linguify_yb/src/models/baseline_transfomer.py
@@ -2,13 +2,13 @@
 """
 
 import torch
-import torch.nn as nn
+from torch import nn
 import torch.nn.functional as F
 
 
 class TokenEmbedding(nn.Module):
     def __init__(self, num_vocab=1000, maxlen=100, num_hid=64):
-        super(TokenEmbedding, self).__init__()
+        super().__init__()
         self.emb = nn.Embedding(num_vocab, num_hid)
         self.pos_emb = nn.Embedding(maxlen, num_hid)
 
@@ -22,21 +22,59 @@ def forward(self, x):
 
 class LandmarkEmbedding(nn.Module):
     def __init__(self, num_hid=64, maxlen=100):
-        super(LandmarkEmbedding, self).__init__()
-        self.conv1 = nn.Conv1d(num_hid, 11, stride=2, padding="same")
-        self.conv2 = nn.Conv1d(num_hid, 11, stride=2, padding="same")
-        self.conv3 = nn.Conv1d(num_hid, 11, stride=2, padding="same")
+        super().__init__()
+        # Calculate the padding for "same" padding
+        self.padding = (11 - 1) // 2
+        self.output_embedding_dim = num_hid
+        self.conv1 = nn.Conv1d(
+            in_channels=1,
+            out_channels=64,
+            kernel_size=11,
+            stride=1,
+            padding=self.padding,
+        )
+        self.conv2 = nn.Conv1d(
+            in_channels=64,
+            out_channels=128,
+            kernel_size=11,
+            stride=1,
+            padding=self.padding,
+        )
+        self.conv3 = nn.Conv1d(
+            in_channels=128,
+            out_channels=256,
+            kernel_size=11,
+            stride=1,
+            padding=self.padding,
+        )
         self.pos_emb = nn.Embedding(maxlen, num_hid)
+        self.embedding_layer = nn.Linear(256 * 345, self.output_embedding_dim)
 
     def forward(self, x):
+        # Input x should have shape (batch_size, input_size)
+        x = x.unsqueeze(1)  # Add a channel dimension for 1D convolution
         x = F.relu(self.conv1(x))
         x = F.relu(self.conv2(x))
-        return F.relu(self.conv3(x))
+        x = F.relu(self.conv3(x))
+
+        # Flatten the output before passing through the linear embedding layer
+        x = x.view(x.size(0), -1)
+
+        # Apply the linear embedding layer
+        x = self.embedding_layer(x)
+
+        return x
 
 
 class TransformerEncoder(nn.Module):
-    def __init__(self, embed_dim, num_heads, feed_forward_dim, rate=0.1):
-        super(TransformerEncoder, self).__init__()
+    def __init__(
+        self,
+        embed_dim,
+        num_heads,
+        feed_forward_dim,
+        rate=0.1,
+    ):
+        super().__init__()
         self.att = nn.MultiheadAttention(embed_dim, num_heads)
         self.ffn = nn.Sequential(
             nn.Linear(embed_dim, feed_forward_dim),
@@ -49,7 +87,7 @@ def __init__(self, embed_dim, num_heads, feed_forward_dim, rate=0.1):
         self.dropout1 = nn.Dropout(rate)
         self.dropout2 = nn.Dropout(rate)
 
-    def forward(self, inputs, training):
+    def forward(self, inputs):
         attn_out, _ = self.att(inputs, inputs, inputs)
         attn_out = self.dropout1(attn_out)
         out1 = self.layernorm1(inputs + attn_out)
@@ -61,7 +99,7 @@ def forward(self, inputs, training):
 
 class TransformerDecoder(nn.Module):
     def __init__(self, embed_dim, num_heads, feed_forward_dim, dropout_rate=0.1):
-        super(TransformerDecoder, self).__init__()
+        super().__init__()
         self.layernorm1 = nn.LayerNorm(embed_dim, eps=1e-6)
         self.layernorm2 = nn.LayerNorm(embed_dim, eps=1e-6)
         self.layernorm3 = nn.LayerNorm(embed_dim, eps=1e-6)
@@ -76,29 +114,32 @@ def __init__(self, embed_dim, num_heads, feed_forward_dim, dropout_rate=0.1):
             nn.Linear(feed_forward_dim, embed_dim),
         )
 
-    def causal_attention_mask(self, batch_size, n_dest, n_src, dtype):
-        i = torch.arange(n_dest)[:, None]
-        j = torch.arange(n_src)
-        m = i >= j - n_src + n_dest
-        mask = m.to(dtype)
-        mask = mask.view(1, n_dest, n_src)
-        mult = torch.cat(
-            [batch_size[..., None], torch.tensor([1, 1], dtype=torch.int32)], 0
+    def causal_attention_mask(
+        self, sequence_length, batch_size=1, num_heads=8, device="cpu"
+    ):
+        mask = torch.triu(torch.ones(sequence_length, sequence_length), diagonal=1).to(
+            device
         )
-        return mask.repeat(mult)
-
-    def forward(self, enc_out, target, training):
-        input_shape = target.size()
-        batch_size = input_shape[0]
-        seq_len = input_shape[1]
-        causal_mask = self.causal_attention_mask(
-            batch_size, seq_len, seq_len, torch.bool
+        mask = mask.unsqueeze(0).expand(
+            batch_size * num_heads, sequence_length, sequence_length
         )
+        return mask
 
-        target_att = self.self_att(target, target, target, attn_mask=causal_mask)
-        target_norm = self.layernorm1(target + self.self_dropout(target_att))
+    def forward(
+        self,
+        src_target_,
+        enc_out,
+    ):
+        input_shape = src_target_.size()
+        batch_size = 1  # input_shape[0]
+        seq_len = input_shape[0]
+        mask = self.causal_attention_mask(seq_len, batch_size=batch_size)
+        target_att, _ = self.self_att(
+            src_target_, src_target_, src_target_, attn_mask=mask
+        )
+        target_norm = self.layernorm1(src_target_ + self.self_dropout(target_att))
 
-        enc_out = self.enc_att(target_norm, enc_out, enc_out)
+        enc_out, _ = self.enc_att(target_norm, enc_out, enc_out)
         enc_out_norm = self.layernorm2(enc_out + self.enc_dropout(enc_out))
 
         ffn_out = self.ffn(enc_out_norm)
@@ -107,27 +148,26 @@ def forward(self, enc_out, target, training):
         return ffn_out_norm
 
 
-class Transformer(nn.Module):
+class NTransformer(nn.Module):
     def __init__(
         self,
         num_hid=64,
-        num_head=2,
+        num_head=8,
         num_feed_forward=128,
         source_maxlen=100,
         target_maxlen=100,
         num_layers_enc=4,
         num_layers_dec=1,
-        num_classes=60,
     ):
-        super(Transformer, self).__init__()
+        super().__init__()
         self.num_layers_enc = num_layers_enc
         self.num_layers_dec = num_layers_dec
         self.target_maxlen = target_maxlen
-        self.num_classes = num_classes
+        self.num_classes = 64
 
         self.enc_input = LandmarkEmbedding(num_hid=num_hid, maxlen=source_maxlen)
         self.dec_input = TokenEmbedding(
-            num_vocab=num_classes, maxlen=target_maxlen, num_hid=num_hid
+            num_vocab=64,
         )
 
         self.encoder = nn.Sequential(
@@ -144,37 +184,53 @@ def __init__(
                 TransformerDecoder(num_hid, num_head, num_feed_forward),
             )
 
-        self.classifier = nn.Linear(num_hid, num_classes)
-
-    def decode(self, enc_out, target, training):
-        y = self.dec_input(target)
-        for i in range(self.num_layers_dec):
-            y = getattr(self, f"dec_layer_{i}")(enc_out, y, training)
-        return y
+        self.classifier = nn.Linear(num_hid, self.num_classes)
 
-    def forward(self, inputs, training):
+    def forward(self, inputs):
         source, target = inputs
         x = self.encoder(source)
-        y = self.decode(x, target, training)
+        y = self.decode(x, target)
         return self.classifier(y)
 
-    def generate(self, source, target_start_token_idx):
+    def decode(self, enc_out, target):
+        y = self.dec_input(target)
+        for i in range(self.num_layers_dec):
+            y = getattr(self, f"dec_layer_{i}")(
+                enc_out,
+                y,
+            )
+        return y
+
+    def generate(self, source, target_start_token_idx=60):
+        """Performs inference over one batch of inputs using greedy decoding
+
+        Parameters
+        ----------
+        source : _type_
+            _description_
+        target_start_token_idx : _type_
+            _description_
+
+        Returns
+        -------
+        _type_
+            _description_
+        """
         bs = source.size(0)
         enc = self.encoder(source)
         dec_input = (
-            torch.ones((bs, 1), dtype=torch.long).to(source.device)
-            * target_start_token_idx
+            torch.ones((1), dtype=torch.long).to(source.device) * target_start_token_idx
         )
         dec_logits = []
+        counter = 0
         for i in range(self.target_maxlen - 1):
-            dec_out = self.decode(enc, dec_input, training=False)
+            dec_out = self.decode(enc, dec_input)
             logits = self.classifier(dec_out)
             logits = torch.argmax(logits, dim=-1, keepdim=True)
             last_logit = logits[:, -1]
             dec_logits.append(last_logit)
             dec_input = torch.cat([dec_input, last_logit], dim=-1)
+            counter += 1
+            if counter > 2:
+                break
         return dec_input
-
-
-def build_model():
-    pass
diff --git a/linguify_yb/src/train.py b/linguify_yb/src/train.py
index b614e206..cdec6f38 100644
--- a/linguify_yb/src/train.py
+++ b/linguify_yb/src/train.py
@@ -6,7 +6,7 @@
 # --epochs 10 \
 # --batch 512 \
 """
-# TODO Complete and refactor code
+# TODO Complete and refactor code for distributed training
 
 import os
 import json
diff --git a/linguify_yb/tests/test_data_ingestion.py b/linguify_yb/tests/test_data_ingestion.py
new file mode 100644
index 00000000..e69de29b
diff --git a/linguify_yb/tests/test_model.py b/linguify_yb/tests/test_model.py
new file mode 100644
index 00000000..e69de29b
diff --git a/linguify_yb/tests/test_pipeline.py b/linguify_yb/tests/test_pipeline.py
new file mode 100644
index 00000000..e69de29b

From 2316deda7afdace08a53fd5cd3f56c4dd054604c Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Sun, 10 Dec 2023 21:08:28 +0100
Subject: [PATCH 04/16] [add] updates

---
 ..._transfomer.py => baseline_transformer.py} | 84 ++++++++-----------
 1 file changed, 37 insertions(+), 47 deletions(-)
 rename linguify_yb/src/models/{baseline_transfomer.py => baseline_transformer.py} (78%)

diff --git a/linguify_yb/src/models/baseline_transfomer.py b/linguify_yb/src/models/baseline_transformer.py
similarity index 78%
rename from linguify_yb/src/models/baseline_transfomer.py
rename to linguify_yb/src/models/baseline_transformer.py
index 543980cc..02d76856 100644
--- a/linguify_yb/src/models/baseline_transfomer.py
+++ b/linguify_yb/src/models/baseline_transformer.py
@@ -3,11 +3,10 @@
 
 import torch
 from torch import nn
-import torch.nn.functional as F
 
 
 class TokenEmbedding(nn.Module):
-    def __init__(self, num_vocab=1000, maxlen=100, num_hid=64):
+    def __init__(self, num_vocab=1000, maxlen=100, num_hid=200):
         super().__init__()
         self.emb = nn.Embedding(num_vocab, num_hid)
         self.pos_emb = nn.Embedding(maxlen, num_hid)
@@ -24,41 +23,33 @@ class LandmarkEmbedding(nn.Module):
     def __init__(self, num_hid=64, maxlen=100):
         super().__init__()
         # Calculate the padding for "same" padding
-        self.padding = (11 - 1) // 2
-        self.output_embedding_dim = num_hid
+        padding = (11 - 1) // 2
+
+        # Define three 1D convolutional layers with ReLU activation and stride 2
         self.conv1 = nn.Conv1d(
-            in_channels=1,
-            out_channels=64,
-            kernel_size=11,
-            stride=1,
-            padding=self.padding,
+            in_channels=1, out_channels=64, kernel_size=11, stride=2, padding=padding
         )
         self.conv2 = nn.Conv1d(
-            in_channels=64,
-            out_channels=128,
-            kernel_size=11,
-            stride=1,
-            padding=self.padding,
+            in_channels=64, out_channels=128, kernel_size=11, stride=2, padding=padding
         )
         self.conv3 = nn.Conv1d(
-            in_channels=128,
-            out_channels=256,
-            kernel_size=11,
-            stride=1,
-            padding=self.padding,
+            in_channels=128, out_channels=256, kernel_size=11, stride=2, padding=padding
         )
-        self.pos_emb = nn.Embedding(maxlen, num_hid)
-        self.embedding_layer = nn.Linear(256 * 345, self.output_embedding_dim)
+
+        # Output embedding layer
+        self.embedding_layer = nn.Linear(256, num_hid)
 
     def forward(self, x):
         # Input x should have shape (batch_size, input_size)
         x = x.unsqueeze(1)  # Add a channel dimension for 1D convolution
-        x = F.relu(self.conv1(x))
-        x = F.relu(self.conv2(x))
-        x = F.relu(self.conv3(x))
 
-        # Flatten the output before passing through the linear embedding layer
-        x = x.view(x.size(0), -1)
+        # Apply convolutional layers with ReLU activation and stride 2
+        x = torch.relu(self.conv1(x))
+        x = torch.relu(self.conv2(x))
+        x = torch.relu(self.conv3(x))
+
+        # Global average pooling to reduce spatial dimensions
+        x = torch.mean(x, dim=2)
 
         # Apply the linear embedding layer
         x = self.embedding_layer(x)
@@ -94,7 +85,9 @@ def forward(self, inputs):
 
         ffn_out = self.ffn(out1)
         ffn_out = self.dropout2(ffn_out)
-        return self.layernorm2(out1 + ffn_out)
+        x = self.layernorm2(out1 + ffn_out)
+        print(f"endocder{x.shape}")
+        return x
 
 
 class TransformerDecoder(nn.Module):
@@ -115,7 +108,7 @@ def __init__(self, embed_dim, num_heads, feed_forward_dim, dropout_rate=0.1):
         )
 
     def causal_attention_mask(
-        self, sequence_length, batch_size=1, num_heads=8, device="cpu"
+        self, sequence_length, batch_size=1, num_heads=4, device="cpu"
     ):
         mask = torch.triu(torch.ones(sequence_length, sequence_length), diagonal=1).to(
             device
@@ -127,8 +120,8 @@ def causal_attention_mask(
 
     def forward(
         self,
-        src_target_,
         enc_out,
+        src_target_,
     ):
         input_shape = src_target_.size()
         batch_size = 1  # input_shape[0]
@@ -144,7 +137,7 @@ def forward(
 
         ffn_out = self.ffn(enc_out_norm)
         ffn_out_norm = self.layernorm3(enc_out_norm + self.ffn_dropout(ffn_out))
-
+        print(f"decoder - {ffn_out_norm.shape}")
         return ffn_out_norm
 
 
@@ -157,17 +150,17 @@ def __init__(
         source_maxlen=100,
         target_maxlen=100,
         num_layers_enc=4,
-        num_layers_dec=1,
+        num_layers_dec=4,
     ):
         super().__init__()
         self.num_layers_enc = num_layers_enc
         self.num_layers_dec = num_layers_dec
         self.target_maxlen = target_maxlen
-        self.num_classes = 64
+        self.num_classes = 62
 
         self.enc_input = LandmarkEmbedding(num_hid=num_hid, maxlen=source_maxlen)
         self.dec_input = TokenEmbedding(
-            num_vocab=64,
+            num_vocab=self.num_classes, maxlen=target_maxlen
         )
 
         self.encoder = nn.Sequential(
@@ -186,19 +179,19 @@ def __init__(
 
         self.classifier = nn.Linear(num_hid, self.num_classes)
 
-    def forward(self, inputs):
-        source, target = inputs
+    def forward(self, source, target):
         x = self.encoder(source)
-        y = self.decode(x, target)
+        y = self.decoder_run(x, target)
+        print(y.shape)
         return self.classifier(y)
 
-    def decode(self, enc_out, target):
+    def decoder_run(self, enc_out, target):
+        print(f"before emb {target.shape}")
         y = self.dec_input(target)
+        print(f"after emb {y.shape}")
+
         for i in range(self.num_layers_dec):
-            y = getattr(self, f"dec_layer_{i}")(
-                enc_out,
-                y,
-            )
+            y = getattr(self, f"dec_layer_{i}")(enc_out, y)
         return y
 
     def generate(self, source, target_start_token_idx=60):
@@ -222,15 +215,12 @@ def generate(self, source, target_start_token_idx=60):
             torch.ones((1), dtype=torch.long).to(source.device) * target_start_token_idx
         )
         dec_logits = []
-        counter = 0
         for i in range(self.target_maxlen - 1):
-            dec_out = self.decode(enc, dec_input)
+            dec_out = self.decoder_run(enc, dec_input)
             logits = self.classifier(dec_out)
+
             logits = torch.argmax(logits, dim=-1, keepdim=True)
-            last_logit = logits[:, -1]
+            last_logit = logits[-1]
             dec_logits.append(last_logit)
             dec_input = torch.cat([dec_input, last_logit], dim=-1)
-            counter += 1
-            if counter > 2:
-                break
         return dec_input

From 4a3fd6546e7a0698706db4f70a3a3cb64af9d01a Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Mon, 11 Dec 2023 02:45:15 +0100
Subject: [PATCH 05/16] [add] mics

---
 linguify_yb/src/dataset/__init__.py       |   0
 linguify_yb/src/dataset/dataset.py        | 145 --------------
 linguify_yb/src/dataset/subsample_data.py | 124 ------------
 linguify_yb/{ => src/tests}/__init__.py   |   0
 linguify_yb/src/train.py                  | 221 ----------------------
 linguify_yb/src/utils/__init__.py         |  74 --------
 linguify_yb/src/utils/benchmark,py        |  50 -----
 linguify_yb/tests/test_data_ingestion.py  |   0
 linguify_yb/tests/test_model.py           |   0
 linguify_yb/tests/test_pipeline.py        |   0
 10 files changed, 614 deletions(-)
 delete mode 100644 linguify_yb/src/dataset/__init__.py
 delete mode 100644 linguify_yb/src/dataset/dataset.py
 delete mode 100644 linguify_yb/src/dataset/subsample_data.py
 rename linguify_yb/{ => src/tests}/__init__.py (100%)
 delete mode 100644 linguify_yb/src/train.py
 delete mode 100644 linguify_yb/src/utils/__init__.py
 delete mode 100644 linguify_yb/src/utils/benchmark,py
 delete mode 100644 linguify_yb/tests/test_data_ingestion.py
 delete mode 100644 linguify_yb/tests/test_model.py
 delete mode 100644 linguify_yb/tests/test_pipeline.py

diff --git a/linguify_yb/src/dataset/__init__.py b/linguify_yb/src/dataset/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/linguify_yb/src/dataset/dataset.py b/linguify_yb/src/dataset/dataset.py
deleted file mode 100644
index e3b85bd3..00000000
--- a/linguify_yb/src/dataset/dataset.py
+++ /dev/null
@@ -1,145 +0,0 @@
-"""doc
-"""
-
-import json
-
-import numpy as np
-import pandas as pd
-import pyarrow.parquet as pq
-import torch
-from torch.nn import functional as F
-from torch.utils.data import DataLoader, Dataset
-
-from linguify_yb.src.dataset.frames_config import FEATURE_COLUMNS, LHAND_IDX, RHAND_IDX
-from linguify_yb.src.dataset.preprocess import frames_preprocess
-
-PHRASE_PATH = "data/asl-fingerspelling/character_to_prediction_index.json"
-METADATA = "data/asl-fingerspelling/train.csv"
-
-with open(PHRASE_PATH, "r", encoding="utf-8") as f:
-    character_to_num = json.load(f)
-
-PAD_TOKEN = "P"
-START_TOKEN = "<"
-END_TOKEN = ">"
-PAD_TOKEN_IDX = 59
-START_TOKEN_IDX = 60
-END_TOKEN_IDX = 61
-
-character_to_num[PAD_TOKEN] = PAD_TOKEN_IDX
-character_to_num[START_TOKEN] = START_TOKEN_IDX
-character_to_num[END_TOKEN] = END_TOKEN_IDX
-num_to_character = {j: i for i, j in character_to_num.items()}
-
-
-class TokenHashTable:
-    def __init__(self, word2index_mapping, index2word_mapping):
-        self.word2index = word2index_mapping
-        self.index2word = index2word_mapping
-
-    def _indexesfromsentence(self, sentence):
-        return [self.word2index[word] for word in sentence]
-
-    def tensorfromsentence(self, sentence):
-        indexes = self._indexesfromsentence(sentence)
-        return torch.tensor(indexes, dtype=torch.long)  # .view(1, -1)
-
-
-def read_file(file, file_id, landmarks_metadata_path):
-    phrase_list = []
-    frames_list = []
-    metadata_train_dataframe = pd.read_csv(landmarks_metadata_path)
-    file_id_df = metadata_train_dataframe.loc[
-        metadata_train_dataframe["file_id"] == file_id
-    ]
-    saved_parueat_df = pq.read_table(
-        file, columns=["sequence_id"] + FEATURE_COLUMNS
-    ).to_pandas()
-    for seq_id, phrase in zip(file_id_df.sequence_id, file_id_df.phrase):
-        frames = saved_parueat_df[saved_parueat_df.index == seq_id].to_numpy()
-        # NaN
-        right_num_nan = np.sum(np.sum(np.isnan(frames[:, RHAND_IDX]), axis=1) == 0)
-        left_num_nan = np.sum(np.sum(np.isnan(frames[:, LHAND_IDX]), axis=1) == 0)
-        total_num_nan = max(right_num_nan, left_num_nan)
-        if 2 * len(phrase) < total_num_nan:
-            frames_list.append(frames)
-            phrase_list.append(phrase)
-    return (frames_list, phrase_list)
-
-
-class LandmarkDataset(Dataset):
-    def __init__(self, file_path, file_id, table, transform=True):
-        self.landmarks_metadata_path = METADATA
-        self.frames, self.labels = read_file(
-            file_path, file_id, self.landmarks_metadata_path
-        )
-        self.trans = transform
-        self.table = table
-
-    def _label_pre(self, label_sample):
-        sample = START_TOKEN + label_sample + END_TOKEN
-        new_phrase = self.table.tensorfromsentence(list(sample))
-        ans = F.pad(
-            input=new_phrase,
-            pad=[0, 64 - new_phrase.shape[0]],
-            mode="constant",
-            value=PAD_TOKEN_IDX,
-        )
-        return ans
-
-    def __len__(self):
-        return len(self.labels)
-
-    def __getitem__(self, idx):
-        if torch.is_tensor(idx):
-            idx = idx.tolist()
-        phrase = self.labels[idx]
-        frames = self.frames[idx]
-
-        if self.trans:
-            phrase = self._label_pre(phrase)
-            frames = frames_preprocess(frames)
-        return frames, phrase
-
-
-def pack_collate_func(batch):
-    frames_feature = [item[0] for item in batch]
-    phrase = [item[1] for item in batch]
-    return [frames_feature, phrase]
-
-
-def get_dataloader(file_path, file_id, batch_size):
-    lookup_table = TokenHashTable(character_to_num, num_to_character)
-    dataset = LandmarkDataset(file_path, file_id, lookup_table, transform=True)
-
-    dataloader = DataLoader(
-        dataset,
-        batch_size=batch_size,
-        num_workers=2,
-        pin_memory=True,
-    )
-    return dataloader
-
-
-# For Debugging Train Pipeline
-
-
-class TestDataset(Dataset):
-    def __init__(self, num_samples=1000, input_size=10):
-        self.num_samples = num_samples
-        self.input_size = input_size
-        self.data = torch.randn(num_samples, input_size)
-        self.labels = torch.randint(0, 2, (num_samples,))
-
-    def __len__(self):
-        return self.num_samples
-
-    def __getitem__(self, idx):
-        return self.data[idx], self.labels[idx]
-
-
-# Generating a dataset with 1000 samples and 10 input features
-testdataset = TestDataset(num_samples=1000, input_size=10)
-TEST_LOADER = DataLoader(
-    dataset=testdataset, batch_size=1, num_workers=2, pin_memory=True
-)
diff --git a/linguify_yb/src/dataset/subsample_data.py b/linguify_yb/src/dataset/subsample_data.py
deleted file mode 100644
index 3935c2b1..00000000
--- a/linguify_yb/src/dataset/subsample_data.py
+++ /dev/null
@@ -1,124 +0,0 @@
-"""Dataset Download Module
-
-This module provides functions to download the a subsample of Google ASL dataset.
-
-Functions:
-- download_dataset(url: str, destination: str, path):
-  Downloads a dataset from the given URL to the specified destination directory.
-- main - the main function to run the script
-"""
-
-
-import os
-import shutil
-import subprocess
-import zipfile
-
-from linguify_yb.src.utils.logger_util import logger
-
-DATA_DIR = "data/asl-fingerspelling/"
-data_files = ["train.csv", "character_to_prediction_index.json"]
-train_landmarks = ["1019715464.parquet", "1021040628.parquet", "105143404.parquet"]
-TRAIN_LANDMARKS_DIR = "train_landmarks/"
-
-COMMAND = [
-    "kaggle",
-    "competitions",
-    "download",
-    "-c",
-    "asl-fingerspelling",
-    "-f",
-    "FILE",
-    "-p",
-    "data/raw/",
-]
-
-
-def check_storage(project_dir=os.getcwd()):
-    """check and return availabe storage space
-
-    Parameters
-    ----------
-    directory_path : str, Path
-        current working directory/directory path
-
-    Returns
-    -------
-    int
-        the size of available storage space (GB)
-
-    Raises
-    ------
-    StorageFullError
-        exception for when storage is full.
-    """
-    total, used, free = shutil.disk_usage(project_dir)
-    total_size_gb = round(total / (2**30), 2)
-    used_size_gb = round(used / (2**30), 2)
-    free_size_gb = round(free / (2**30), 2)
-    if used_size_gb / total_size_gb >= 0.8:
-        raise StorageFullError
-    return free_size_gb
-
-
-class StorageFullError(Exception):
-    """Custom exception for when storage is full."""
-
-    pass
-
-
-def downlaod_file(cmd, unzipped_file_path, data_dir):
-    """Download file using kaggle API
-
-    Parameters
-    ----------
-    cmd : list
-        Kaggle API Commands
-    unzipped_file : str, Path
-        path of the unzipped file
-    data_dir : str, Path
-        the directory where the data should be downloaded into
-    """
-    subprocess.run(cmd, check=True, text=True)
-    if (
-        os.path.exists(unzipped_file_path)
-        and os.path.splitext(unzipped_file_path)[1].lower() == ".zip"
-    ):
-        # Unzipping and delete the zipped file to free storage
-        with zipfile.ZipFile(unzipped_file_path, "r") as zip_ref:
-            zip_ref.extractall(data_dir)
-        os.remove(unzipped_file_path)
-    else:
-        pass
-
-
-def main():
-    """the main function to run the script"""
-    logger.info("Commencing downloading the dataset")
-    try:
-        logger.info(f"Current Available space {check_storage()}GB")
-        for file in data_files:
-            logger.info(f"Downloading{file} in {DATA_DIR}")
-            COMMAND[6] = file
-            unzipfile_path = DATA_DIR + file + ".zip"
-            downlaod_file(COMMAND, unzipfile_path, DATA_DIR)
-            logger.info(f" {file} downloaded succesful")
-        # Downloading the LANDMARKS files
-        for parquet_file in train_landmarks:
-            logger.info(f"Current Available space {check_storage()}GB")
-            file_path = TRAIN_LANDMARKS_DIR + parquet_file
-            COMMAND[6] = file_path
-            COMMAND[8] = DATA_DIR + TRAIN_LANDMARKS_DIR
-            unzipfile_path = DATA_DIR + file_path + ".zip"
-            downlaod_file(COMMAND, unzipfile_path, DATA_DIR + TRAIN_LANDMARKS_DIR)
-            logger.info(f"{parquet_file} downloaded succesfully")
-
-        logger.success("All files downloaded succesfully")
-
-    except Exception as error:
-        logger.error(f"failed due to {error}")
-        logger.exception("Data unloading was unsuccesfully")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/linguify_yb/__init__.py b/linguify_yb/src/tests/__init__.py
similarity index 100%
rename from linguify_yb/__init__.py
rename to linguify_yb/src/tests/__init__.py
diff --git a/linguify_yb/src/train.py b/linguify_yb/src/train.py
deleted file mode 100644
index cdec6f38..00000000
--- a/linguify_yb/src/train.py
+++ /dev/null
@@ -1,221 +0,0 @@
-"""
-doc
-
-# Usage:
-# python -m src/train.py \
-# --epochs 10 \
-# --batch 512 \
-"""
-# TODO Complete and refactor code for distributed training
-
-import os
-import json
-
-import numpy as np
-import torch
-import wandb
-from torch import nn
-
-from linguify_yb.src.dataset.dataset import get_dataloader, TEST_LOADER
-from linguify_yb.src.models.model_loader import ModelLoader
-from linguify_yb.src.utils import get_device_strategy, parse_args, set_seed
-from linguify_yb.src.utils.logger_util import logger
-
-
-try:
-    dataset_paths = "dev_samples.json"  # On kaggle replace with "dataset_paths.json" to train on full data
-    with open(dataset_paths, "r", encoding="utf-8") as json_file:
-        data_dict = json.load(json_file)
-    LANDMARK_DIR = "/kaggle/input/asl-fingerspelling/train_landmarks"
-    MODEL_DIR = "model.pt"
-
-    # Training dataset
-    train_dataset = data_dict["train_files"]
-    train_file_ids = [os.path.basename(file) for file in train_dataset]
-    train_file_ids = [
-        int(file_name.replace(".parquet", "")) for file_name in train_file_ids
-    ]
-    assert len(train_dataset) == len(
-        train_file_ids
-    ), "Failed import of Train files path "
-    TRAIN_DS_FILES = list(zip(train_dataset, train_file_ids))
-
-    # Validation dataset
-    valid_dataset = data_dict["valid_files"]
-    valid_file_ids = [os.path.basename(file) for file in valid_dataset]
-    valid_file_ids = [
-        int(file_name.replace(".parquet", "")) for file_name in valid_file_ids
-    ]
-    assert len(train_dataset) == len(
-        train_file_ids
-    ), "Failed Import of Valid Files path"
-    VALID_DS_FILES = list(zip(valid_dataset, valid_file_ids))
-except AssertionError as asset_error:
-    logger.exception(f"failed {asset_error}")
-
-
-def train(model, optim, loss_func, n_epochs, batch, device):
-    # To ensure reproducibility of the training process
-    set_seed()
-    train_losses = []
-    val_losses = []
-    val_dataloader = TEST_LOADER  # get_dataloader(TRAIN_FILES[0][0], TRAIN_FILES[0][1], batch_size=batch)
-
-    for epoch in range(n_epochs):
-        logger.info(f"Training on epoch {epoch}.")
-        total_epochs = epoch
-        file_train_loss = []
-        for file, file_id in TRAIN_DS_FILES:
-            train_dataloader = (
-                TEST_LOADER  # get_dataloader(file, file_id, batch_size=batch)
-            )
-
-            # Performs training using mini-batches
-            train_loss = mini_batch(
-                model, train_dataloader, optim, loss_func, device, validation=False
-            )
-            file_train_loss.append(train_loss)
-        train_loss = np.mean(file_train_loss)
-        train_losses.append(train_loss)
-
-        # Performs evaluation using mini-batches
-        logger.info("Starting validation.")
-        with torch.no_grad():
-            val_loss = mini_batch(
-                model, val_dataloader, optim, loss_func, device, validation=True
-            )
-            val_losses.append(val_loss)
-
-        wandb.log(
-            {
-                "train_loss": train_loss,
-                "val_loss": val_loss,
-                "epoch": epoch,
-            }
-        )
-
-        if epoch // 2 == 0:
-            logger.info("Initiating checkpoint. Saving model and optimizer states.")
-            save_checkpoint(
-                MODEL_DIR, model, optim, total_epochs, train_losses, val_losses
-            )
-
-
-def mini_batch(
-    model, dataloader, mini_batch_optim, loss_func, device, validation=False
-):
-    # The mini-batch can be used with both loaders
-    # The argument `validation`defines which loader and
-    # corresponding step function is going to be used
-    if validation:
-        step_func = val_step_func(model, loss_func)
-    else:
-        step_func = train_step_func(model, mini_batch_optim, loss_func)
-
-    # Once the data loader and step function, this is the same
-    # mini-batch loop we had before
-    mini_batch_losses = []
-    for x_batch, y_batch in dataloader:
-        x_batch = x_batch.to(device)
-        y_batch = y_batch.to(device)
-        loss = step_func(x=x_batch, y=y_batch)
-        mini_batch_losses.append(loss)
-    loss = np.mean(mini_batch_losses)
-    return loss
-
-
-def train_step_func(model, optim_, loss_func):
-    def perform_train_step_fn(x, y):
-        model.train()
-        preds = model(x)
-        loss = loss_func(preds, y)
-        loss.backward()
-        optim_.step()
-        optim_.zero_grad()
-        return loss.item()
-
-    return perform_train_step_fn
-
-
-def val_step_func(model, loss_func):
-    def perform_val_step_fn(x, y):
-        model.eval()
-        preds = model(x)
-        loss = loss_func(preds, y)
-        return loss.item()
-
-    return perform_val_step_fn
-
-
-def save_checkpoint(filename, model, optimizer, total_epochs, train_losses, val_losses):
-    # Builds dictionary with all elements for resuming training
-    checkpoint = {
-        "epoch": total_epochs,
-        "model_state_dict": model.state_dict(),
-        "optimizer_state_dict": optimizer.state_dict(),
-        "loss": train_losses,
-        "val_loss": val_losses,
-    }
-
-    torch.save(checkpoint, filename)
-
-
-def load_checkpoint(model, optimizer, filename):
-    # Loads dictionary
-    checkpoint = torch.load(filename)
-
-    # Restore state for model and optimizer
-    model.load_state_dict(checkpoint["model_state_dict"])
-    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
-
-    total_epochs = checkpoint["epoch"]
-    losses = checkpoint["loss"]
-    val_losses = checkpoint["val_loss"]
-    return model
-
-
-def main(arg):
-    logger.info(f"Starting training on {arg.model}")
-
-    DEVICE = get_device_strategy(tpu=arg.tpu)
-    logger.info(f"Training on {DEVICE} for {arg.epochs} epochs.")
-
-    model = ModelLoader().get_model(arg.model)
-
-    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
-    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
-    model = model.to(DEVICE)
-
-    # Optimizes given model/function using TorchDynamo and specified backend
-    torch.compile(model)
-
-    logger.info("training")
-    wandb.init(
-        project="ASL-project",
-        config={
-            "learning_rate": 0.01,
-            "architecture": "Test Model",
-            "dataset": "Google ASL Landmarks",
-            "epochs": 12,
-        },
-    )
-
-    wandb.watch(model)
-    try:
-        train(
-            model=arg.model,
-            optim=optimizer,
-            loss_func=criterion,
-            n_epochs=arg.epochs,
-            batch=arg.batch,
-            device=DEVICE,
-        )
-        logger.success(f"Training completed: {arg.epochs} epochs on {DEVICE}.")
-
-    except Exception as error:
-        logger.exception(f"Training failed due to an {error}.")
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    main(args)
diff --git a/linguify_yb/src/utils/__init__.py b/linguify_yb/src/utils/__init__.py
deleted file mode 100644
index 8da12c8d..00000000
--- a/linguify_yb/src/utils/__init__.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import os
-import random
-import argparse
-
-import numpy as np
-import torch
-
-import torch_xla.core.xla_model as xm
-
-
-def set_seed(seed: int = 42) -> None:
-    np.random.seed(seed)
-    random.seed(seed)
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed(seed)
-    # When running on the CuDNN backend, two further options must be set
-    torch.backends.cudnn.deterministic = True
-    torch.backends.cudnn.benchmark = False
-    # Set a fixed value for the hash seed
-    os.environ["PYTHONHASHSEED"] = str(seed)
-
-
-def get_device_strategy(tpu=False):
-    if tpu:
-        device = xm.xla_device()
-    else:
-        device = torch.device("cuda" if torch.cuda.is_availabe() else "cpu")
-    return device
-
-
-def parse_args():
-    """
-    Parse arguments given to the script.
-
-    Returns:
-        The parsed argument object.
-    """
-    parser = argparse.ArgumentParser(
-        description="Run distributed data-parallel training and log with wandb."
-    )
-
-    parser.add_argument(
-        "--model",
-        default="asl_transfomer",
-        type=str,
-        metavar="N",
-        help="name of model to train",
-    )
-
-    parser.add_argument(
-        "--epochs",
-        default=2,
-        type=int,
-        metavar="N",
-        help="number of total epochs to run",
-    )
-    parser.add_argument(
-        "--batch",
-        default=32,
-        type=int,
-        metavar="N",
-        help="number of data samples in one batch",
-    )
-    parser.add_argument(
-        "--tpu",
-        default=False,
-        type=bool,
-        metavar="N",
-        help="Train on TPU Device",
-    )
-    parser.add_argument('--resume_checkpoint', type=bool, help='Path to the checkpoint for resuming training')
-
-    args = parser.parse_args()
-    return args
diff --git a/linguify_yb/src/utils/benchmark,py b/linguify_yb/src/utils/benchmark,py
deleted file mode 100644
index e9e4e5b9..00000000
--- a/linguify_yb/src/utils/benchmark,py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""doc
-"""
-
-import profile_macs
-import torch.nn as nn
-
-
-Byte = 8
-KiB = 1024 * Byte
-MiB = 1024 * KiB
-GiB = 1024 * MiB
-
-
-def get_model_macs(model, inputs) -> int:
-    return profile_macs(model, inputs)
-
-
-def get_model_sparsity(model: nn.Module) -> float:
-    """
-    calculate the sparsity of the given model
-        sparsity = #zeros / #elements = 1 - #nonzeros / #elements
-    """
-    num_nonzeros, num_elements = 0, 0
-    for param in model.parameters():
-        num_nonzeros += param.count_nonzero()
-        num_elements += param.numel()
-    return 1 - float(num_nonzeros) / num_elements
-
-
-def get_num_parameters(model: nn.Module, count_nonzero_only=False) -> int:
-    """
-    calculate the total number of parameters of model
-    :param count_nonzero_only: only count nonzero weights
-    """
-    num_counted_elements = 0
-    for param in model.parameters():
-        if count_nonzero_only:
-            num_counted_elements += param.count_nonzero()
-        else:
-            num_counted_elements += param.numel()
-    return num_counted_elements
-
-
-def get_model_size(model: nn.Module, data_width=32, count_nonzero_only=False) -> int:
-    """
-    calculate the model size in bits
-    :param data_width: #bits per element
-    :param count_nonzero_only: only count nonzero weights
-    """
-    return get_num_parameters(model, count_nonzero_only) * data_width
diff --git a/linguify_yb/tests/test_data_ingestion.py b/linguify_yb/tests/test_data_ingestion.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/linguify_yb/tests/test_model.py b/linguify_yb/tests/test_model.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/linguify_yb/tests/test_pipeline.py b/linguify_yb/tests/test_pipeline.py
deleted file mode 100644
index e69de29b..00000000

From b6e5e95486ae9c70adc805f100af7b64102108e6 Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Mon, 11 Dec 2023 02:46:12 +0100
Subject: [PATCH 06/16] [add] added  units test

---
 linguify_yb/src/tests/test_data_ingestion.py |  8 ++++
 linguify_yb/src/tests/test_model.py          | 39 ++++++++++++++++++++
 linguify_yb/src/tests/test_pipeline.py       |  0
 3 files changed, 47 insertions(+)
 create mode 100644 linguify_yb/src/tests/test_data_ingestion.py
 create mode 100644 linguify_yb/src/tests/test_model.py
 create mode 100644 linguify_yb/src/tests/test_pipeline.py

diff --git a/linguify_yb/src/tests/test_data_ingestion.py b/linguify_yb/src/tests/test_data_ingestion.py
new file mode 100644
index 00000000..b4e86067
--- /dev/null
+++ b/linguify_yb/src/tests/test_data_ingestion.py
@@ -0,0 +1,8 @@
+"doc"
+
+import torch
+import pytest
+
+
+def test_data_columns():
+    pass
diff --git a/linguify_yb/src/tests/test_model.py b/linguify_yb/src/tests/test_model.py
new file mode 100644
index 00000000..c1206d1c
--- /dev/null
+++ b/linguify_yb/src/tests/test_model.py
@@ -0,0 +1,39 @@
+"""doc
+"""
+
+import pytest
+
+import torch
+from torch.utils.data import DataLoader
+from src.models.baseline_transformer import ASLTransformer
+
+@pytest.fixture
+def baseline_model():
+    """_summary_"""
+    model = ASLTransformer()
+    return model
+
+
+@pytest.mark.parametrize(
+    "inputs_x, target_y", [(torch.randn(128, 345), torch.randint(0, 60, (64,)))]
+)
+def test_baseline_transformer_output_shape(baseline_model, inputs_x, target_y):
+    """_summary_"""
+    output = baseline_model(inputs_x, target_y)
+    # Assert
+    expected_output_shape = (64, 62)
+    assert output.shape == expected_output_shape
+
+@pytest.mark.parametrize("inputs_x", [(torch.randn(128,345)), (torch.randn(128,345))])
+def test_baseline_transformer_generate_out(
+    baseline_model,
+    inputs_x,
+):
+    """_summary_"""
+    output = baseline_model.generate(inputs_x)
+    # Assert
+    expected_output_len = 64
+    assert len(output) == expected_output_len
+
+@pytest.mark.parametrize("input_shape", [(batch_size, input_dim) for batch_size in [1, 2, 5] for input_dim in [32, 64, 128]])
+def
\ No newline at end of file
diff --git a/linguify_yb/src/tests/test_pipeline.py b/linguify_yb/src/tests/test_pipeline.py
new file mode 100644
index 00000000..e69de29b

From bd8a07549da2c67219511f955d3616d9f5526eaa Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Mon, 11 Dec 2023 02:46:49 +0100
Subject: [PATCH 07/16] [add] documentaiion for baseline transformer

---
 .../src/models/baseline_transformer.py        | 236 ++++++++++++------
 1 file changed, 161 insertions(+), 75 deletions(-)

diff --git a/linguify_yb/src/models/baseline_transformer.py b/linguify_yb/src/models/baseline_transformer.py
index 02d76856..2ab43725 100644
--- a/linguify_yb/src/models/baseline_transformer.py
+++ b/linguify_yb/src/models/baseline_transformer.py
@@ -1,26 +1,65 @@
-"""doc
 """
+Baseline Transformer Module
 
+This module contains the implementation of a Transformer model for sign language tasks.
+
+Classes:
+- TokenEmbedding: Create embedding for the target seqeunce
+- LandmarkEmbedding: Create embedding for the source(frames)seqeunce
+- Encoder: Implements the transformer encoder stack.
+- Decoder: Implements the transformer decoder stack.
+- Transformer: The main transformer model class with methods for training and inference.
+
+Methods:
+- Transformer.generate: Perform inference on a new sequence
+"""
 import torch
 from torch import nn
 
 
 class TokenEmbedding(nn.Module):
-    def __init__(self, num_vocab=1000, maxlen=100, num_hid=200):
+    """_summary_"""
+
+    def __init__(self, num_vocab, maxlen, embedding_dim):
+        """_summary_
+
+        Parameters
+        ----------
+        num_vocab : _type_
+            _description_
+        maxlen : _type_
+            _description_
+        embedding_dim : _type_
+            _description_
+        """
         super().__init__()
-        self.emb = nn.Embedding(num_vocab, num_hid)
-        self.pos_emb = nn.Embedding(maxlen, num_hid)
+        self.token_embed_layer = nn.Embedding(num_vocab, embedding_dim)
+        self.postion_embed_layer = nn.Embedding(maxlen, embedding_dim)
 
     def forward(self, x):
+        """_summary_
+
+        Parameters
+        ----------
+        x : _type_
+            _description_
+
+        Returns
+        -------
+        _type_
+            _description_
+        """
         maxlen = x.size(-1)
-        x = self.emb(x)
+        x = self.token_embed_layer(x)
         positions = torch.arange(0, maxlen).to(x.device)
-        positions = self.pos_emb(positions)
+        positions = self.postion_embed_layer(positions)
         return x + positions
 
 
 class LandmarkEmbedding(nn.Module):
-    def __init__(self, num_hid=64, maxlen=100):
+    """_summary_"""
+
+    def __init__(self, embedding_dim):
         super().__init__()
         # Calculate the padding for "same" padding
         padding = (11 - 1) // 2
@@ -37,7 +76,7 @@ def __init__(self, num_hid=64, maxlen=100):
         )
 
         # Output embedding layer
-        self.embedding_layer = nn.Linear(256, num_hid)
+        self.embedding_layer = nn.Linear(256, embedding_dim)
 
     def forward(self, x):
         # Input x should have shape (batch_size, input_size)
@@ -58,141 +97,189 @@ def forward(self, x):
 
 
 class TransformerEncoder(nn.Module):
+    """_summary_"""
+
     def __init__(
         self,
-        embed_dim,
+        embedding_dim,
         num_heads,
         feed_forward_dim,
         rate=0.1,
     ):
+        """_summary_
+
+        Parameters
+        ----------
+        embedding_dim : _type_
+            _description_
+        num_heads : _type_
+            _description_
+        feed_forward_dim : _type_
+            _description_
+        rate : float, optional
+            _description_, by default 0.1
+        """
         super().__init__()
-        self.att = nn.MultiheadAttention(embed_dim, num_heads)
+        self.multi_attention = nn.MultiheadAttention(embedding_dim, num_heads)
         self.ffn = nn.Sequential(
-            nn.Linear(embed_dim, feed_forward_dim),
+            nn.Linear(embedding_dim, feed_forward_dim),
             nn.ReLU(),
-            nn.Linear(feed_forward_dim, embed_dim),
+            nn.Linear(feed_forward_dim, embedding_dim),
         )
 
-        self.layernorm1 = nn.LayerNorm(embed_dim, eps=1e-6)
-        self.layernorm2 = nn.LayerNorm(embed_dim, eps=1e-6)
+        self.layernorm1 = nn.LayerNorm(embedding_dim, eps=1e-6)
+        self.layernorm2 = nn.LayerNorm(embedding_dim, eps=1e-6)
         self.dropout1 = nn.Dropout(rate)
         self.dropout2 = nn.Dropout(rate)
 
-    def forward(self, inputs):
-        attn_out, _ = self.att(inputs, inputs, inputs)
-        attn_out = self.dropout1(attn_out)
-        out1 = self.layernorm1(inputs + attn_out)
+    def forward(self, inputs_x):
+        multi_attention_out, _ = self.multi_attention(inputs_x, inputs_x, inputs_x)
+        multi_attention_out = self.dropout1(multi_attention_out)
+        out1 = self.layernorm1(inputs_x + multi_attention_out)
 
         ffn_out = self.ffn(out1)
         ffn_out = self.dropout2(ffn_out)
         x = self.layernorm2(out1 + ffn_out)
-        print(f"endocder{x.shape}")
         return x
 
 
 class TransformerDecoder(nn.Module):
-    def __init__(self, embed_dim, num_heads, feed_forward_dim, dropout_rate=0.1):
+    """_summary_"""
+
+    def __init__(self, embedding_dim, num_heads, feed_forward_dim, dropout_rate=0.1):
         super().__init__()
-        self.layernorm1 = nn.LayerNorm(embed_dim, eps=1e-6)
-        self.layernorm2 = nn.LayerNorm(embed_dim, eps=1e-6)
-        self.layernorm3 = nn.LayerNorm(embed_dim, eps=1e-6)
-        self.self_att = nn.MultiheadAttention(embed_dim, num_heads)
-        self.enc_att = nn.MultiheadAttention(embed_dim, num_heads)
-        self.self_dropout = nn.Dropout(0.5)
-        self.enc_dropout = nn.Dropout(0.1)
-        self.ffn_dropout = nn.Dropout(0.1)
+        self.num_heads_ = num_heads
+        self.layernorm1 = nn.LayerNorm(embedding_dim, eps=1e-6)
+        self.layernorm2 = nn.LayerNorm(embedding_dim, eps=1e-6)
+        self.layernorm3 = nn.LayerNorm(embedding_dim, eps=1e-6)
+        self.decoder_multi_attention = nn.MultiheadAttention(embedding_dim, num_heads)
+        self.encoder_multi_attention = nn.MultiheadAttention(embedding_dim, num_heads)
+        self.decoder_dropout = nn.Dropout(0.5)
+        self.encoder_dropout = nn.Dropout(dropout_rate)
+        self.ffn_dropout = nn.Dropout(dropout_rate)
         self.ffn = nn.Sequential(
-            nn.Linear(embed_dim, feed_forward_dim),
+            nn.Linear(embedding_dim, feed_forward_dim),
             nn.ReLU(),
-            nn.Linear(feed_forward_dim, embed_dim),
+            nn.Linear(feed_forward_dim, embedding_dim),
         )
 
-    def causal_attention_mask(
-        self, sequence_length, batch_size=1, num_heads=4, device="cpu"
-    ):
+    def _causal_attention_mask(self, sequence_length, batch_size=1, device=None):
         mask = torch.triu(torch.ones(sequence_length, sequence_length), diagonal=1).to(
             device
         )
         mask = mask.unsqueeze(0).expand(
-            batch_size * num_heads, sequence_length, sequence_length
+            batch_size * self.num_heads_, sequence_length, sequence_length
         )
         return mask
 
     def forward(
         self,
-        enc_out,
+        encoder_out,
         src_target_,
     ):
         input_shape = src_target_.size()
         batch_size = 1  # input_shape[0]
         seq_len = input_shape[0]
-        mask = self.causal_attention_mask(seq_len, batch_size=batch_size)
-        target_att, _ = self.self_att(
-            src_target_, src_target_, src_target_, attn_mask=mask
+        x_device = src_target_.device
+
+        # Mask
+        causal_mask = self._causal_attention_mask(
+            sequence_length=seq_len, batch_size=batch_size, device=x_device
         )
-        target_norm = self.layernorm1(src_target_ + self.self_dropout(target_att))
 
-        enc_out, _ = self.enc_att(target_norm, enc_out, enc_out)
-        enc_out_norm = self.layernorm2(enc_out + self.enc_dropout(enc_out))
+        target_att, _ = self.decoder_multi_attention(
+            src_target_, src_target_, src_target_, attn_mask=causal_mask
+        )
+        target_norm_out = self.layernorm1(
+            src_target_ + self.decoder_dropout(target_att)
+        )
+
+        encoder_out, _ = self.encoder_multi_attention(
+            target_norm_out, encoder_out, encoder_out
+        )
+        enc_out_norm = self.layernorm2(encoder_out + self.encoder_dropout(encoder_out))
 
         ffn_out = self.ffn(enc_out_norm)
         ffn_out_norm = self.layernorm3(enc_out_norm + self.ffn_dropout(ffn_out))
-        print(f"decoder - {ffn_out_norm.shape}")
         return ffn_out_norm
 
 
-class NTransformer(nn.Module):
+class ASLTransformer(nn.Module):
     def __init__(
         self,
-        num_hid=64,
-        num_head=8,
+        num_hidden_dim=64,
+        multi_num_head=8,
         num_feed_forward=128,
-        source_maxlen=100,
-        target_maxlen=100,
+        target_maxlen=64,
         num_layers_enc=4,
         num_layers_dec=4,
     ):
+        """_summary_
+
+        Parameters
+        ----------
+        num_hidden_dim : int, optional
+            _description_, by default 64
+        multi_num_head : int, optional
+            _description_, by default 8
+        num_feed_forward : int, optional
+            _description_, by default 128
+        target_maxlen : int, optional
+            _description_, by default 64
+        num_layers_enc : int, optional
+            _description_, by default 4
+        num_layers_dec : int, optional
+            _description_, by default 4
+        """
         super().__init__()
         self.num_layers_enc = num_layers_enc
         self.num_layers_dec = num_layers_dec
         self.target_maxlen = target_maxlen
         self.num_classes = 62
 
-        self.enc_input = LandmarkEmbedding(num_hid=num_hid, maxlen=source_maxlen)
-        self.dec_input = TokenEmbedding(
-            num_vocab=self.num_classes, maxlen=target_maxlen
+        self.encoder_input = LandmarkEmbedding(embedding_dim=num_hidden_dim)
+        self.decoder_input = TokenEmbedding(
+            num_vocab=self.num_classes,
+            embedding_dim=num_hidden_dim,
+            maxlen=target_maxlen,
         )
 
         self.encoder = nn.Sequential(
-            self.enc_input,
+            self.encoder_input,
             *[
-                TransformerEncoder(num_hid, num_head, num_feed_forward)
+                TransformerEncoder(
+                    embedding_dim=num_hidden_dim,
+                    num_heads=multi_num_head,
+                    feed_forward_dim=num_feed_forward,
+                )
                 for _ in range(num_layers_enc)
             ],
         )
 
         for i in range(num_layers_dec):
             self.add_module(
-                f"dec_layer_{i}",
-                TransformerDecoder(num_hid, num_head, num_feed_forward),
+                f"decoder_layer_{i}",
+                TransformerDecoder(
+                    embedding_dim=num_hidden_dim,
+                    num_heads=multi_num_head,
+                    feed_forward_dim=num_feed_forward,
+                ),
             )
 
-        self.classifier = nn.Linear(num_hid, self.num_classes)
+        self.classifier = nn.Linear(
+            in_features=num_hidden_dim, out_features=self.num_classes
+        )
 
     def forward(self, source, target):
-        x = self.encoder(source)
-        y = self.decoder_run(x, target)
-        print(y.shape)
-        return self.classifier(y)
-
-    def decoder_run(self, enc_out, target):
-        print(f"before emb {target.shape}")
-        y = self.dec_input(target)
-        print(f"after emb {y.shape}")
+        encoder_out = self.encoder(source)
+        transformer_output = self._decoder_run(encoder_out, target)
+        return self.classifier(transformer_output)
 
+    def _decoder_run(self, enc_out, target):
+        decoder_out = self.decoder_input(target)
         for i in range(self.num_layers_dec):
-            y = getattr(self, f"dec_layer_{i}")(enc_out, y)
-        return y
+            decoder_out = getattr(self, f"decoder_layer_{i}")(enc_out, decoder_out)
+        return decoder_out
 
     def generate(self, source, target_start_token_idx=60):
         """Performs inference over one batch of inputs using greedy decoding
@@ -201,7 +288,7 @@ def generate(self, source, target_start_token_idx=60):
         ----------
         source : _type_
             _description_
-        target_start_token_idx : _type_
+        target_start_token_idx : int
             _description_
 
         Returns
@@ -209,18 +296,17 @@ def generate(self, source, target_start_token_idx=60):
         _type_
             _description_
         """
-        bs = source.size(0)
-        enc = self.encoder(source)
-        dec_input = (
+        encoder_out = self.encoder(source)
+        decoder_input = (
             torch.ones((1), dtype=torch.long).to(source.device) * target_start_token_idx
         )
         dec_logits = []
-        for i in range(self.target_maxlen - 1):
-            dec_out = self.decoder_run(enc, dec_input)
-            logits = self.classifier(dec_out)
+        for _ in range(self.target_maxlen - 1):
+            decoder_out = self._decoder_run(encoder_out, decoder_input)
+            logits = self.classifier(decoder_out)
 
             logits = torch.argmax(logits, dim=-1, keepdim=True)
             last_logit = logits[-1]
             dec_logits.append(last_logit)
-            dec_input = torch.cat([dec_input, last_logit], dim=-1)
-        return dec_input
+            decoder_input = torch.cat([decoder_input, last_logit], dim=-1)
+        return decoder_input

From 9a9915b77b80a33feb995fac27e23cfccfe321c9 Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Mon, 11 Dec 2023 02:47:20 +0100
Subject: [PATCH 08/16] [add] added model benchmark script

---
 linguify_yb/src/benchmark.py | 58 ++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 linguify_yb/src/benchmark.py

diff --git a/linguify_yb/src/benchmark.py b/linguify_yb/src/benchmark.py
new file mode 100644
index 00000000..4c710533
--- /dev/null
+++ b/linguify_yb/src/benchmark.py
@@ -0,0 +1,58 @@
+"""doc
+"""
+from torchprofile import profile_macs
+from torch import nn
+
+
+Byte = 8
+KiB = 1024 * Byte
+MiB = 1024 * KiB
+GiB = 1024 * MiB
+
+
+class BenchMarker:
+    def __init__(self) -> None:
+        pass
+
+    def get_model_macs(self, model, inputs=None) -> int:
+        return profile_macs(model, inputs)
+
+    def get_model_sparsity(self, model: nn.Module) -> float:
+        """
+        calculate the sparsity of the given model
+            sparsity = #zeros / #elements = 1 - #nonzeros / #elements
+        """
+        num_nonzeros, num_elements = 0, 0
+        for param in model.parameters():
+            num_nonzeros += param.count_nonzero()
+            num_elements += param.numel()
+        return 1 - float(num_nonzeros) / num_elements
+
+    def get_num_parameters(self, model: nn.Module, count_nonzero_only=False) -> int:
+        """
+        calculate the total number of parameters of model
+        :param count_nonzero_only: only count nonzero weights
+        """
+        num_counted_elements = 0
+        for param in model.parameters():
+            if count_nonzero_only:
+                num_counted_elements += param.count_nonzero()
+            else:
+                num_counted_elements += param.numel()
+        return num_counted_elements
+
+    def get_model_size(
+        self, model: nn.Module, data_width=32, count_nonzero_only=False
+    ) -> int:
+        """
+        calculate the model size in bits
+        :param data_width: #bits per element
+        :param count_nonzero_only: only count nonzero weights
+        """
+        return self.get_num_parameters(model, count_nonzero_only) * data_width
+
+    def runner(self, model):
+        model_macs = self.get_model_macs(model)
+        model_sparsity = self.get_model_sparsity(model)
+        model_num_params = self.get_num_parameters(model)
+        model_size = self.get_model_size(model)

From 321987f4395d45880375721a541015d064374709 Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Mon, 11 Dec 2023 02:49:21 +0100
Subject: [PATCH 09/16] [add] updates readme

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 12455609..320d59a3 100644
--- a/README.md
+++ b/README.md
@@ -56,9 +56,9 @@ $ source $(poetry env info --path)/bin/activate
 
 Here's a glimpse of the exciting features we plan to implement in the coming weeks:
 
-- [x] Add project's documentation (you are reading it now), create issues and milestones, setup document's stub, suggest
-  page layout and styling.
+- [x] Add project's documentation
 - [] Develop a Proof of Concept System
+- [] Deployment of Proof of Concept System
 
 ## Acknowledgments
 

From 85d3d02ba54fb85fa5e754ef04493531b78c886d Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Mon, 11 Dec 2023 03:28:08 +0100
Subject: [PATCH 10/16] [add] test workflow

---
 .github/workflows/run_units_test.yml | 29 ++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 .github/workflows/run_units_test.yml

diff --git a/.github/workflows/run_units_test.yml b/.github/workflows/run_units_test.yml
new file mode 100644
index 00000000..43fa0cb2
--- /dev/null
+++ b/.github/workflows/run_units_test.yml
@@ -0,0 +1,29 @@
+name: Units Tests
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  units-test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Use Node.js 16
+        uses: actions/setup-node@v3
+        with:
+          node-version: 16
+
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.10
+
+      - name: Pytest
+        run: |
+          cd linguify
+          pytest
\ No newline at end of file

From b337d33195bcf07ff0c99bce7ad77640774d8739 Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Mon, 11 Dec 2023 06:08:27 +0100
Subject: [PATCH 11/16] [add] mics

---
 linguify_yb/development/code_dev.ipynb  | 161 ++++++
 linguify_yb/development/data_dev.ipynb  |  39 --
 linguify_yb/development/dev.ipynb       | 112 +---
 linguify_yb/development/trans_dev.ipynb | 697 ++++++++++++++++++++++++
 4 files changed, 876 insertions(+), 133 deletions(-)
 create mode 100644 linguify_yb/development/code_dev.ipynb
 create mode 100644 linguify_yb/development/trans_dev.ipynb

diff --git a/linguify_yb/development/code_dev.ipynb b/linguify_yb/development/code_dev.ipynb
new file mode 100644
index 00000000..a41f02c2
--- /dev/null
+++ b/linguify_yb/development/code_dev.ipynb
@@ -0,0 +1,161 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def resize_pad(x):\n",
+    "    if x.shape[0] < FRAME_LEN:\n",
+    "        x = F.pad(x, (0, 0, 0, FRAME_LEN - x.shape[0], 0, 0))\n",
+    "    else:\n",
+    "        x = x.unsqueeze(0)  # Add batch and channel dimensions\n",
+    "        x = torch.nn.functional.interpolate(\n",
+    "            x, size=(FRAME_LEN, x.shape[1]), mode=\"bilinear\", align_corners=False\n",
+    "        ).squeeze(0)\n",
+    "\n",
+    "    return x\n",
+    "\n",
+    "\n",
+    "def frames_preprocess(x):\n",
+    "    x = torch.tensor(x)\n",
+    "    rhand = x[:, RHAND_IDX]\n",
+    "    lhand = x[:, LHAND_IDX]\n",
+    "    rpose = x[:, RPOSE_IDX]\n",
+    "    lpose = x[:, LPOSE_IDX]\n",
+    "\n",
+    "    rnan_idx = torch.any(torch.isnan(rhand), dim=1)\n",
+    "    lnan_idx = torch.any(torch.isnan(lhand), dim=1)\n",
+    "\n",
+    "    rnans = torch.sum(rnan_idx)\n",
+    "    lnans = torch.sum(lnan_idx)\n",
+    "\n",
+    "    if rnans > lnans:\n",
+    "        hand = lhand\n",
+    "        pose = lpose\n",
+    "\n",
+    "        hand_x = hand[:, 0 * (len(LHAND_IDX) // 3) : 1 * (len(LHAND_IDX) // 3)]\n",
+    "        hand_y = hand[:, 1 * (len(LHAND_IDX) // 3) : 2 * (len(LHAND_IDX) // 3)]\n",
+    "        hand_z = hand[:, 2 * (len(LHAND_IDX) // 3) : 3 * (len(LHAND_IDX) // 3)]\n",
+    "        hand = torch.cat([1 - hand_x, hand_y, hand_z], dim=1)\n",
+    "\n",
+    "        pose_x = pose[:, 0 * (len(LPOSE_IDX) // 3) : 1 * (len(LPOSE_IDX) // 3)]\n",
+    "        pose_y = pose[:, 1 * (len(LPOSE_IDX) // 3) : 2 * (len(LPOSE_IDX) // 3)]\n",
+    "        pose_z = pose[:, 2 * (len(LPOSE_IDX) // 3) : 3 * (len(LPOSE_IDX) // 3)]\n",
+    "        pose = torch.cat([1 - pose_x, pose_y, pose_z], dim=1)\n",
+    "    else:\n",
+    "        hand = rhand\n",
+    "        pose = rpose\n",
+    "\n",
+    "    hand_x = hand[:, 0 * (len(LHAND_IDX) // 3) : 1 * (len(LHAND_IDX) // 3)]\n",
+    "    hand_y = hand[:, 1 * (len(LHAND_IDX) // 3) : 2 * (len(LHAND_IDX) // 3)]\n",
+    "    hand_z = hand[:, 2 * (len(LHAND_IDX) // 3) : 3 * (len(LHAND_IDX) // 3)]\n",
+    "    hand = torch.cat(\n",
+    "        [hand_x.unsqueeze(-1), hand_y.unsqueeze(-1), hand_z.unsqueeze(-1)], dim=-1\n",
+    "    )\n",
+    "\n",
+    "    mean = torch.mean(hand, dim=1).unsqueeze(1)\n",
+    "    std = torch.std(hand, dim=1).unsqueeze(1)\n",
+    "    hand = (hand - mean) / std\n",
+    "\n",
+    "    pose_x = pose[:, 0 * (len(LPOSE_IDX) // 3) : 1 * (len(LPOSE_IDX) // 3)]\n",
+    "    pose_y = pose[:, 1 * (len(LPOSE_IDX) // 3) : 2 * (len(LPOSE_IDX) // 3)]\n",
+    "    pose_z = pose[:, 2 * (len(LPOSE_IDX) // 3) : 3 * (len(LPOSE_IDX) // 3)]\n",
+    "    pose = torch.cat(\n",
+    "        [pose_x.unsqueeze(-1), pose_y.unsqueeze(-1), pose_z.unsqueeze(-1)], dim=-1\n",
+    "    )\n",
+    "\n",
+    "    x = torch.cat([hand, pose], dim=1)\n",
+    "    # print(f\"befor  re{x.shape}\")\n",
+    "    x = resize_pad(x)\n",
+    "    # print(f\"after re{x.shape}\")\n",
+    "    x = torch.where(torch.isnan(x), torch.zeros_like(x), x)\n",
+    "    # print(x.shape)\n",
+    "\n",
+    "    #! CRITICAL Debug\n",
+    "    # x = x.view(FRAME_LEN, len(LHAND_IDX) + len(LPOSE_IDX))\n",
+    "    return x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"doc\n",
+    "\"\"\"\n",
+    "\n",
+    "FRAME_LEN = 128\n",
+    "\n",
+    "LPOSE = [13, 15, 17, 19, 21]\n",
+    "RPOSE = [14, 16, 18, 20, 22]\n",
+    "POSE = LPOSE + RPOSE\n",
+    "\n",
+    "X = (\n",
+    "    [f\"x_right_hand_{i}\" for i in range(21)]\n",
+    "    + [f\"x_left_hand_{i}\" for i in range(21)]\n",
+    "    + [f\"x_pose_{i}\" for i in POSE]\n",
+    ")\n",
+    "Y = (\n",
+    "    [f\"y_right_hand_{i}\" for i in range(21)]\n",
+    "    + [f\"y_left_hand_{i}\" for i in range(21)]\n",
+    "    + [f\"y_pose_{i}\" for i in POSE]\n",
+    ")\n",
+    "Z = (\n",
+    "    [f\"z_right_hand_{i}\" for i in range(21)]\n",
+    "    + [f\"z_left_hand_{i}\" for i in range(21)]\n",
+    "    + [f\"z_pose_{i}\" for i in POSE]\n",
+    ")\n",
+    "\n",
+    "FEATURE_COLUMNS = X + Y + Z\n",
+    "\n",
+    "X_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if \"x_\" in col]\n",
+    "Y_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if \"y_\" in col]\n",
+    "Z_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if \"z_\" in col]\n",
+    "\n",
+    "RHAND_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if \"right\" in col]\n",
+    "LHAND_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if \"left\" in col]\n",
+    "RPOSE_IDX = [\n",
+    "    i\n",
+    "    for i, col in enumerate(FEATURE_COLUMNS)\n",
+    "    if \"pose\" in col and int(col[-2:]) in RPOSE\n",
+    "]\n",
+    "LPOSE_IDX = [\n",
+    "    i\n",
+    "    for i, col in enumerate(FEATURE_COLUMNS)\n",
+    "    if \"pose\" in col and int(col[-2:]) in LPOSE\n",
+    "]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/linguify_yb/development/data_dev.ipynb b/linguify_yb/development/data_dev.ipynb
index 2fd7178f..cade00e3 100644
--- a/linguify_yb/development/data_dev.ipynb
+++ b/linguify_yb/development/data_dev.ipynb
@@ -13,45 +13,6 @@
     "import pyarrow.parquet as pq"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Total Space: 1399.98 GB\n",
-      "Used Space: 455.25 GB\n",
-      "Free Space: 944.73 GB\n",
-      "Percentage Used: 32.50%\n"
-     ]
-    }
-   ],
-   "source": [
-    "import psutil\n",
-    "\n",
-    "def get_storage_space():\n",
-    "    # Get disk usage statistics\n",
-    "    disk_usage = psutil.disk_usage('/')\n",
-    "\n",
-    "    # Extract relevant information\n",
-    "    total_space = disk_usage.total  # Total storage space\n",
-    "    used_space = disk_usage.used    # Used storage space\n",
-    "    free_space = disk_usage.free    # Free storage space\n",
-    "    percent_used = disk_usage.percent  # Percentage of used space\n",
-    "\n",
-    "    # Print or return the information\n",
-    "    print(f\"Total Space: {total_space / (1024 ** 3):.2f} GB\")\n",
-    "    print(f\"Used Space: {used_space / (1024 ** 3):.2f} GB\")\n",
-    "    print(f\"Free Space: {free_space / (1024 ** 3):.2f} GB\")\n",
-    "    print(f\"Percentage Used: {percent_used:.2f}%\")\n",
-    "\n",
-    "# Call the function to get storage space information\n",
-    "get_storage_space()\n"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 16,
diff --git a/linguify_yb/development/dev.ipynb b/linguify_yb/development/dev.ipynb
index 273dbbb5..9224fe47 100644
--- a/linguify_yb/development/dev.ipynb
+++ b/linguify_yb/development/dev.ipynb
@@ -4,25 +4,6 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
-   "source": [
-    "%pip install torch --quiet\n",
-    "%pip install torchprofile --quiet"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -36,8 +17,9 @@
     "import random\n",
     "import os\n",
     "import torch\n",
+    "import torch.nn as nn\n",
     "import numpy as np\n",
-    "\n",
+    "from torchprofile import profile_macs\n",
     "\n",
     "def set_seed(seed: int = 42) -> None:\n",
     "    np.random.seed(seed)\n",
@@ -56,85 +38,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "from torch import nn\n",
-    "import os\n",
-    "#from torchprofile import profile_macs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
-    "logits = torch.rand(64,64)"
+    "ans = [(torch.randn(batch_size, 128, 345), torch.randint(0, 60, (batch_size, 64)), batch_size) \n",
+    "                           for batch_size in [1,2,4,8]]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dec_logits = []\n",
-    "dec_input = (torch.ones((1), dtype=torch.long)* 60)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "logits = torch.argmax(logits, dim=-1, keepdim=True) \n",
-    "last_logit = logits[:, -1]\n",
-    "dec_logits.append(last_logit)\n",
-    "dec_input = torch.cat([dec_input, last_logit], dim=-1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "torch.Size([64])"
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "last_logit.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "torch.Size([65])"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([1, 128, 345]) torch.Size([1, 64]) 1\n",
+      "torch.Size([2, 128, 345]) torch.Size([2, 64]) 2\n",
+      "torch.Size([4, 128, 345]) torch.Size([4, 64]) 4\n",
+      "torch.Size([8, 128, 345]) torch.Size([8, 64]) 8\n"
+     ]
     }
    ],
    "source": [
-    "dec_input.shape"
+    "for b in ans:\n",
+    "    x, y , bs = b\n",
+    "    print(x.shape, y.shape,bs)"
    ]
   },
   {
@@ -142,14 +73,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "for idx in preds[i, :]:\n",
-    "    prediction += self.idx_to_char[idx]\n",
-    "    if idx == 60:\n",
-    "        break\n",
-    "print(f\"target:     {target_text.replace('-','')}\")\n",
-    "print(f\"prediction: {prediction}\\n\")"
-   ]
+   "source": []
   }
  ],
  "metadata": {
diff --git a/linguify_yb/development/trans_dev.ipynb b/linguify_yb/development/trans_dev.ipynb
new file mode 100644
index 00000000..e2c673c6
--- /dev/null
+++ b/linguify_yb/development/trans_dev.ipynb
@@ -0,0 +1,697 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Random seed set as 42\n"
+     ]
+    }
+   ],
+   "source": [
+    "import random\n",
+    "import os\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import numpy as np\n",
+    "from loguru import logger\n",
+    "from torchprofile import profile_macs\n",
+    "\n",
+    "def set_seed(seed: int = 42) -> None:\n",
+    "    np.random.seed(seed)\n",
+    "    random.seed(seed)\n",
+    "    torch.manual_seed(seed)\n",
+    "    torch.cuda.manual_seed(seed)\n",
+    "    # When running on the CuDNN backend, two further options must be set\n",
+    "    torch.backends.cudnn.deterministic = True\n",
+    "    torch.backends.cudnn.benchmark = False\n",
+    "    # Set a fixed value for the hash seed\n",
+    "    os.environ[\"PYTHONHASHSEED\"] = str(seed)\n",
+    "    print(f\"Random seed set as {seed}\")\n",
+    "\n",
+    "set_seed()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "Baseline Transformer Module\n",
+    "\n",
+    "This module contains the implementation of a Transformer model for sign language tasks.\n",
+    "\n",
+    "Classes:\n",
+    "- TokenEmbedding: Create embedding for the target seqeunce\n",
+    "- LandmarkEmbedding: Create embedding for the source(frames)seqeunce\n",
+    "- Encoder: Implements the transformer encoder stack.\n",
+    "- Decoder: Implements the transformer decoder stack.\n",
+    "- Transformer: The main transformer model class with methods for training and inference.\n",
+    "\n",
+    "Methods:\n",
+    "- Transformer.generate: Perform inference on a new sequence\n",
+    "\"\"\"\n",
+    "import torch\n",
+    "from torch import nn\n",
+    "\n",
+    "\n",
+    "class TokenEmbedding(nn.Module):\n",
+    "    \"\"\"Embed the tokens with postion encoding\"\"\"\n",
+    "\n",
+    "    def __init__(self, num_vocab, maxlen, embedding_dim):\n",
+    "        \"\"\"_summary_\n",
+    "\n",
+    "        Parameters\n",
+    "        ----------\n",
+    "        num_vocab : int\n",
+    "            number of vocabulary\n",
+    "        maxlen : int\n",
+    "            maximuin length of sequence\n",
+    "        embedding_dim : int\n",
+    "            embedding output dimension\n",
+    "        \"\"\"\n",
+    "        super().__init__()\n",
+    "        self.token_embed_layer = nn.Embedding(num_vocab, embedding_dim)\n",
+    "        self.postion_embed_layer = nn.Embedding(maxlen, embedding_dim)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        \"\"\"_summary_\n",
+    "\n",
+    "        Parameters\n",
+    "        ----------\n",
+    "        x : tensors\n",
+    "            _description_\n",
+    "\n",
+    "        Returns\n",
+    "        -------\n",
+    "        tensors\n",
+    "            _description_\n",
+    "        \"\"\"\n",
+    "        maxlen = x.size(-1)\n",
+    "        x = self.token_embed_layer(x)\n",
+    "        positions = torch.arange(0, maxlen).to(x.device)\n",
+    "        positions = self.postion_embed_layer(positions)\n",
+    "        return x + positions\n",
+    "\n",
+    "\n",
+    "class LandmarkEmbedding(nn.Module):\n",
+    "    \"\"\"_summary_\"\"\"\n",
+    "\n",
+    "    def __init__(self, embedding_dim):\n",
+    "        super().__init__()\n",
+    "        # Calculate the padding for \"same\" padding\n",
+    "        padding = (11 - 1) // 2\n",
+    "\n",
+    "        # Define three 1D convolutional layers with ReLU activation and stride 2\n",
+    "        self.conv1 = nn.Conv1d(\n",
+    "            in_channels=1, out_channels=64, kernel_size=11, stride=2, padding=padding\n",
+    "        )\n",
+    "        self.conv2 = nn.Conv1d(\n",
+    "            in_channels=64, out_channels=128, kernel_size=11, stride=2, padding=padding\n",
+    "        )\n",
+    "        self.conv3 = nn.Conv1d(\n",
+    "            in_channels=128, out_channels=256, kernel_size=11, stride=2, padding=padding\n",
+    "        )\n",
+    "\n",
+    "        # Output embedding layer\n",
+    "        self.embedding_layer = nn.Linear(256, embedding_dim)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        # Input x should have shape (batch_size, input_size, input_dim)\n",
+    "        x = x.unsqueeze(1)  # Add a channel dimension for 1D convolution\n",
+    "\n",
+    "        # Apply convolutional layers with ReLU activation and stride 2\n",
+    "        x = torch.relu(self.conv1(x))\n",
+    "        x = torch.relu(self.conv2(x))\n",
+    "        x = torch.relu(self.conv3(x))\n",
+    "\n",
+    "        # Global average pooling to reduce spatial dimensions\n",
+    "        x = torch.mean(x, dim=2)\n",
+    "\n",
+    "        # Apply the linear embedding layer\n",
+    "        x = self.embedding_layer(x)\n",
+    "\n",
+    "        return x\n",
+    "\n",
+    "\n",
+    "class TransformerEncoder(nn.Module):\n",
+    "    \"\"\"_summary_\"\"\"\n",
+    "\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        embedding_dim,\n",
+    "        num_heads,\n",
+    "        feed_forward_dim,\n",
+    "        rate=0.1,\n",
+    "    ):\n",
+    "        \"\"\"_summary_\n",
+    "\n",
+    "        Parameters\n",
+    "        ----------\n",
+    "        embedding_dim : _type_\n",
+    "            _description_\n",
+    "        num_heads : _type_\n",
+    "            _description_\n",
+    "        feed_forward_dim : _type_\n",
+    "            _description_\n",
+    "        rate : float, optional\n",
+    "            _description_, by default 0.1\n",
+    "        \"\"\"\n",
+    "        super().__init__()\n",
+    "        self.multi_attention = nn.MultiheadAttention(embedding_dim, num_heads)\n",
+    "        self.ffn = nn.Sequential(\n",
+    "            nn.Linear(embedding_dim, feed_forward_dim),\n",
+    "            nn.ReLU(),\n",
+    "            nn.Linear(feed_forward_dim, embedding_dim),\n",
+    "        )\n",
+    "\n",
+    "        self.layernorm1 = nn.LayerNorm(embedding_dim, eps=1e-6)\n",
+    "        self.layernorm2 = nn.LayerNorm(embedding_dim, eps=1e-6)\n",
+    "        self.dropout1 = nn.Dropout(rate)\n",
+    "        self.dropout2 = nn.Dropout(rate)\n",
+    "\n",
+    "    def forward(self, inputs_x):\n",
+    "        multi_attention_out, _ = self.multi_attention(inputs_x, inputs_x, inputs_x)\n",
+    "        multi_attention_out = self.dropout1(multi_attention_out)\n",
+    "        out1 = self.layernorm1(inputs_x + multi_attention_out)\n",
+    "\n",
+    "        ffn_out = self.ffn(out1)\n",
+    "        ffn_out = self.dropout2(ffn_out)\n",
+    "        x = self.layernorm2(out1 + ffn_out)\n",
+    "        return x\n",
+    "\n",
+    "\n",
+    "class TransformerDecoder(nn.Module):\n",
+    "    \"\"\"_summary_\"\"\"\n",
+    "\n",
+    "    def __init__(self, embedding_dim, num_heads, feed_forward_dim, dropout_rate=0.1):\n",
+    "        super().__init__()\n",
+    "        self.num_heads_ = num_heads\n",
+    "        self.layernorm1 = nn.LayerNorm(embedding_dim, eps=1e-6)\n",
+    "        self.layernorm2 = nn.LayerNorm(embedding_dim, eps=1e-6)\n",
+    "        self.layernorm3 = nn.LayerNorm(embedding_dim, eps=1e-6)\n",
+    "        self.decoder_multi_attention = nn.MultiheadAttention(embedding_dim, num_heads)\n",
+    "        self.encoder_multi_attention = nn.MultiheadAttention(embedding_dim, num_heads)\n",
+    "        self.decoder_dropout = nn.Dropout(0.5)\n",
+    "        self.encoder_dropout = nn.Dropout(dropout_rate)\n",
+    "        self.ffn_dropout = nn.Dropout(dropout_rate)\n",
+    "        self.ffn = nn.Sequential(\n",
+    "            nn.Linear(embedding_dim, feed_forward_dim),\n",
+    "            nn.ReLU(),\n",
+    "            nn.Linear(feed_forward_dim, embedding_dim),\n",
+    "        )\n",
+    "\n",
+    "    def _causal_attention_mask(self, sequence_length, batch_size=1, device=None):\n",
+    "        mask = torch.triu(torch.ones(sequence_length, sequence_length), diagonal=1).to(\n",
+    "            device\n",
+    "        )\n",
+    "        mask = mask.unsqueeze(0).expand(\n",
+    "            batch_size * self.num_heads_, sequence_length, sequence_length\n",
+    "        )\n",
+    "        return mask\n",
+    "\n",
+    "    def forward(\n",
+    "        self,\n",
+    "        encoder_out,\n",
+    "        src_target_,\n",
+    "    ):\n",
+    "        input_shape = src_target_.size()\n",
+    "        batch_size = 1  # input_shape[0]\n",
+    "        seq_len = input_shape[0]\n",
+    "        x_device = src_target_.device\n",
+    "\n",
+    "        # Mask\n",
+    "        causal_mask = self._causal_attention_mask(\n",
+    "            sequence_length=seq_len, batch_size=batch_size, device=x_device\n",
+    "        )\n",
+    "\n",
+    "        target_att, _ = self.decoder_multi_attention(\n",
+    "            src_target_, src_target_, src_target_, attn_mask=causal_mask\n",
+    "        )\n",
+    "        target_norm_out = self.layernorm1(\n",
+    "            src_target_ + self.decoder_dropout(target_att)\n",
+    "        )\n",
+    "\n",
+    "        encoder_out, _ = self.encoder_multi_attention(\n",
+    "            target_norm_out, encoder_out, encoder_out\n",
+    "        )\n",
+    "        enc_out_norm = self.layernorm2(encoder_out + self.encoder_dropout(encoder_out))\n",
+    "\n",
+    "        ffn_out = self.ffn(enc_out_norm)\n",
+    "        ffn_out_norm = self.layernorm3(enc_out_norm + self.ffn_dropout(ffn_out))\n",
+    "        return ffn_out_norm\n",
+    "\n",
+    "\n",
+    "class ASLTransformer(nn.Module):\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        num_hidden_dim=64,\n",
+    "        multi_num_head=8,\n",
+    "        num_feed_forward=128,\n",
+    "        target_maxlen=64,\n",
+    "        num_layers_enc=4,\n",
+    "        num_layers_dec=4,\n",
+    "    ):\n",
+    "        \"\"\"_summary_\n",
+    "\n",
+    "        Parameters\n",
+    "        ----------\n",
+    "        num_hidden_dim : int, optional\n",
+    "            _description_, by default 64\n",
+    "        multi_num_head : int, optional\n",
+    "            _description_, by default 8\n",
+    "        num_feed_forward : int, optional\n",
+    "            _description_, by default 128\n",
+    "        target_maxlen : int, optional\n",
+    "            _description_, by default 64\n",
+    "        num_layers_enc : int, optional\n",
+    "            _description_, by default 4\n",
+    "        num_layers_dec : int, optional\n",
+    "            _description_, by default 4\n",
+    "        \"\"\"\n",
+    "        super().__init__()\n",
+    "        self.num_layers_enc = num_layers_enc\n",
+    "        self.num_layers_dec = num_layers_dec\n",
+    "        self.target_maxlen = target_maxlen\n",
+    "        self.num_classes = 62\n",
+    "\n",
+    "        self.encoder_input = LandmarkEmbedding(embedding_dim=num_hidden_dim)\n",
+    "        self.decoder_input = TokenEmbedding(\n",
+    "            num_vocab=self.num_classes,\n",
+    "            embedding_dim=num_hidden_dim,\n",
+    "            maxlen=target_maxlen,\n",
+    "        )\n",
+    "\n",
+    "        self.encoder = nn.Sequential(\n",
+    "            self.encoder_input,\n",
+    "            *[\n",
+    "                TransformerEncoder(\n",
+    "                    embedding_dim=num_hidden_dim,\n",
+    "                    num_heads=multi_num_head,\n",
+    "                    feed_forward_dim=num_feed_forward,\n",
+    "                )\n",
+    "                for _ in range(num_layers_enc)\n",
+    "            ],\n",
+    "        )\n",
+    "\n",
+    "        for i in range(num_layers_dec):\n",
+    "            self.add_module(\n",
+    "                f\"decoder_layer_{i}\",\n",
+    "                TransformerDecoder(\n",
+    "                    embedding_dim=num_hidden_dim,\n",
+    "                    num_heads=multi_num_head,\n",
+    "                    feed_forward_dim=num_feed_forward,\n",
+    "                ),\n",
+    "            )\n",
+    "\n",
+    "        self.classifier = nn.Linear(\n",
+    "            in_features=num_hidden_dim, out_features=self.num_classes\n",
+    "        )\n",
+    "\n",
+    "    def _decoder_run(self, enc_out, target):\n",
+    "        decoder_out = self.decoder_input(target)\n",
+    "        for i in range(self.num_layers_dec):\n",
+    "            decoder_out = getattr(self, f\"decoder_layer_{i}\")(enc_out, decoder_out)\n",
+    "        return decoder_out\n",
+    "\n",
+    "    def forward(self, source, target):\n",
+    "        if len(source.shape) == 2:  # Check if single input\n",
+    "            source = source.unsqueeze(0)  # Add batch dimension\n",
+    "        if len(target.shape) == 1:  # Check if single input\n",
+    "            target = target.unsqueeze(0)  # Add batch dimension\n",
+    "\n",
+    "        encoder_out = self.encoder(source)\n",
+    "        transformer_output = self._decoder_run(encoder_out, target)\n",
+    "        return self.classifier(transformer_output)\n",
+    "\n",
+    "    def generate(self, source, target_start_token_idx=60):\n",
+    "        if len(source.shape) == 2:  # Check if single input\n",
+    "            source = source.unsqueeze(0)  # Add batch dimension\n",
+    "\n",
+    "        encoder_out = self.encoder(source)\n",
+    "        decoder_input = (\n",
+    "            torch.ones((source.shape[0], 1), dtype=torch.long)\n",
+    "            .to(source.device)\n",
+    "            * target_start_token_idx\n",
+    "        )\n",
+    "        dec_logits = []\n",
+    "\n",
+    "        for _ in range(self.target_maxlen - 1):\n",
+    "            decoder_out = self._decoder_run(encoder_out, decoder_input)\n",
+    "            logits = self.classifier(decoder_out)\n",
+    "\n",
+    "            logits = torch.argmax(logits, dim=-1, keepdim=True)\n",
+    "            last_logit = logits[:, -1]\n",
+    "            dec_logits.append(last_logit)\n",
+    "            decoder_input = torch.cat([decoder_input, last_logit], dim=-1)\n",
+    "\n",
+    "        return decoder_input.squeeze(0) if len(source.shape) == 2 else decoder_input"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2023-12-11 03:21:41.460\u001b[0m | \u001b[31m\u001b[1mERROR   \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m24\u001b[0m - \u001b[31m\u001b[1m ERROR Message ==> Expected 2D (unbatched) or 3D (batched) input to conv1d, but got input of size: [1, 1, 128, 345]\u001b[0m\n",
+      "\u001b[33m\u001b[1mTraceback (most recent call last):\u001b[0m\n",
+      "\n",
+      "  File \"<frozen runpy>\", line 198, in _run_module_as_main\n",
+      "  File \"<frozen runpy>\", line 88, in _run_code\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel_launcher.py\", line 17, in <module>\n",
+      "    app.launch_new_instance()\n",
+      "    │   └ <bound method Application.launch_instance of <class 'ipykernel.kernelapp.IPKernelApp'>>\n",
+      "    └ <module 'ipykernel.kernelapp' from 'c:\\\\Users\\\\Yinka\\\\AppData\\\\Local\\\\Programs\\\\Python\\\\Python311\\\\Lib\\\\site-packages\\\\ipyker...\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\traitlets\\config\\application.py\", line 1041, in launch_instance\n",
+      "    app.start()\n",
+      "    │   └ <function IPKernelApp.start at 0x0000017524A5C400>\n",
+      "    └ <ipykernel.kernelapp.IPKernelApp object at 0x000001751F4770D0>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\kernelapp.py\", line 724, in start\n",
+      "    self.io_loop.start()\n",
+      "    │    │       └ <function BaseAsyncIOLoop.start at 0x0000017524A5D3A0>\n",
+      "    │    └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x0000017524AAA910>\n",
+      "    └ <ipykernel.kernelapp.IPKernelApp object at 0x000001751F4770D0>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tornado\\platform\\asyncio.py\", line 215, in start\n",
+      "    self.asyncio_loop.run_forever()\n",
+      "    │    │            └ <function BaseEventLoop.run_forever at 0x000001752137EAC0>\n",
+      "    │    └ <_WindowsSelectorEventLoop running=True closed=False debug=False>\n",
+      "    └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x0000017524AAA910>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 607, in run_forever\n",
+      "    self._run_once()\n",
+      "    │    └ <function BaseEventLoop._run_once at 0x0000017521380900>\n",
+      "    └ <_WindowsSelectorEventLoop running=True closed=False debug=False>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 1922, in _run_once\n",
+      "    handle._run()\n",
+      "    │      └ <function Handle._run at 0x00000175213309A0>\n",
+      "    └ <Handle Task.task_wakeup(<Future finis...510>, ...],))>)>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\events.py\", line 80, in _run\n",
+      "    self._context.run(self._callback, *self._args)\n",
+      "    │    │            │    │           │    └ <member '_args' of 'Handle' objects>\n",
+      "    │    │            │    │           └ <Handle Task.task_wakeup(<Future finis...510>, ...],))>)>\n",
+      "    │    │            │    └ <member '_callback' of 'Handle' objects>\n",
+      "    │    │            └ <Handle Task.task_wakeup(<Future finis...510>, ...],))>)>\n",
+      "    │    └ <member '_context' of 'Handle' objects>\n",
+      "    └ <Handle Task.task_wakeup(<Future finis...510>, ...],))>)>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\kernelbase.py\", line 512, in dispatch_queue\n",
+      "    await self.process_one()\n",
+      "          │    └ <function Kernel.process_one at 0x0000017523406C00>\n",
+      "          └ <ipykernel.ipkernel.IPythonKernel object at 0x0000017524A71F90>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\kernelbase.py\", line 501, in process_one\n",
+      "    await dispatch(*args)\n",
+      "          │         └ ([<zmq.sugar.frame.Frame object at 0x0000017524A3E150>, <zmq.sugar.frame.Frame object at 0x0000017524A3E210>, <zmq.sugar.fram...\n",
+      "          └ <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object at 0x0000017524A71F90>>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\kernelbase.py\", line 408, in dispatch_shell\n",
+      "    await result\n",
+      "          └ <coroutine object Kernel.execute_request at 0x000001753359F060>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\kernelbase.py\", line 731, in execute_request\n",
+      "    reply_content = await reply_content\n",
+      "                          └ <coroutine object IPythonKernel.do_execute at 0x000001753357FDC0>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\ipkernel.py\", line 417, in do_execute\n",
+      "    res = shell.run_cell(\n",
+      "          │     └ <function ZMQInteractiveShell.run_cell at 0x0000017524A49760>\n",
+      "          └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x0000017524AC5090>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\zmqshell.py\", line 540, in run_cell\n",
+      "    return super().run_cell(*args, **kwargs)\n",
+      "                             │       └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/c%3A/Main%20Workspace/Cohort8-Ransom-Kuti-Ladipo/l...\n",
+      "                             └ ('# Create a sample input\\nbatch_source_sequence = torch.randn(2, 128, 345)  # Sample source sequence (batch_size, maxlen, nu...\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 2945, in run_cell\n",
+      "    result = self._run_cell(\n",
+      "             │    └ <function InteractiveShell._run_cell at 0x0000017522A823E0>\n",
+      "             └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x0000017524AC5090>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 3000, in _run_cell\n",
+      "    return runner(coro)\n",
+      "           │      └ <coroutine object InteractiveShell.run_cell_async at 0x0000017524B85080>\n",
+      "           └ <function _pseudo_sync_runner at 0x0000017522A75620>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\IPython\\core\\async_helpers.py\", line 129, in _pseudo_sync_runner\n",
+      "    coro.send(None)\n",
+      "    │    └ <method 'send' of 'coroutine' objects>\n",
+      "    └ <coroutine object InteractiveShell.run_cell_async at 0x0000017524B85080>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 3203, in run_cell_async\n",
+      "    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n",
+      "                       │    │             │        │     └ 'C:\\\\Users\\\\Yinka\\\\AppData\\\\Local\\\\Temp\\\\ipykernel_3812\\\\4012752373.py'\n",
+      "                       │    │             │        └ [<ast.Assign object at 0x00000175339678B0>, <ast.Assign object at 0x0000017533967E80>, <ast.Assign object at 0x0000017533967E...\n",
+      "                       │    │             └ <ast.Module object at 0x0000017533967B50>\n",
+      "                       │    └ <function InteractiveShell.run_ast_nodes at 0x0000017522A82700>\n",
+      "                       └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x0000017524AC5090>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 3382, in run_ast_nodes\n",
+      "    if await self.run_code(code, result, async_=asy):\n",
+      "             │    │        │     │              └ False\n",
+      "             │    │        │     └ <ExecutionResult object at 1753225ad90, execution_count=7 error_before_exec=None error_in_exec=None info=<ExecutionInfo objec...\n",
+      "             │    │        └ <code object <module> at 0x0000017533D041B0, file \"C:\\Users\\Yinka\\AppData\\Local\\Temp\\ipykernel_3812\\4012752373.py\", line 1>\n",
+      "             │    └ <function InteractiveShell.run_code at 0x0000017522A827A0>\n",
+      "             └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x0000017524AC5090>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 3442, in run_code\n",
+      "    exec(code_obj, self.user_global_ns, self.user_ns)\n",
+      "         │         │    │               │    └ {'__name__': '__main__', '__doc__': '\\nBaseline Transformer Module\\n\\nThis module contains the implementation of a Transforme...\n",
+      "         │         │    │               └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x0000017524AC5090>\n",
+      "         │         │    └ <property object at 0x0000017522A6E160>\n",
+      "         │         └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x0000017524AC5090>\n",
+      "         └ <code object <module> at 0x0000017533D041B0, file \"C:\\Users\\Yinka\\AppData\\Local\\Temp\\ipykernel_3812\\4012752373.py\", line 1>\n",
+      "\n",
+      "> File \"\u001b[32mC:\\Users\\Yinka\\AppData\\Local\\Temp\\ipykernel_3812\\\u001b[0m\u001b[32m\u001b[1m4012752373.py\u001b[0m\", line \u001b[33m18\u001b[0m, in \u001b[35m<module>\u001b[0m\n",
+      "    \u001b[1mpredictions\u001b[0m \u001b[35m\u001b[1m=\u001b[0m \u001b[1mtransformer_model\u001b[0m\u001b[1m(\u001b[0m\u001b[1msingle_src_seq\u001b[0m\u001b[1m,\u001b[0m \u001b[1msingle_trg_seq\u001b[0m\u001b[1m)\u001b[0m\n",
+      "    \u001b[36m              │                 │               └ \u001b[0m\u001b[36m\u001b[1mtensor([53,  3, 20, 29, 50, 25, 29, 13, 15, 32, 10, 15, 32, 36, 23, 50, 53,  5,\u001b[0m\n",
+      "    \u001b[36m              │                 │                 \u001b[0m\u001b[36m\u001b[1m        43, 37, 41, 58, 30, 12,  6, 40, 31, 4...\u001b[0m\n",
+      "    \u001b[36m              │                 └ \u001b[0m\u001b[36m\u001b[1mtensor([[0.4061, 0.6164, 0.6337,  ..., 0.5066, 0.4483, 0.4846],\u001b[0m\n",
+      "    \u001b[36m              │                   \u001b[0m\u001b[36m\u001b[1m        [0.0074, 0.1099, 0.6021,  ..., 0.5536, 0.7102, 0.4944...\u001b[0m\n",
+      "    \u001b[36m              └ \u001b[0m\u001b[36m\u001b[1mASLTransformer(\u001b[0m\n",
+      "    \u001b[36m                \u001b[0m\u001b[36m\u001b[1m  (encoder_input): LandmarkEmbedding(\u001b[0m\n",
+      "    \u001b[36m                \u001b[0m\u001b[36m\u001b[1m    (conv1): Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,)...\u001b[0m\n",
+      "\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1501, in _call_impl\n",
+      "    return forward_call(*args, **kwargs)\n",
+      "           │             │       └ {}\n",
+      "           │             └ (tensor([[0.4061, 0.6164, 0.6337,  ..., 0.5066, 0.4483, 0.4846],\n",
+      "           │                       [0.0074, 0.1099, 0.6021,  ..., 0.5536, 0.7102, 0.494...\n",
+      "           └ <bound method ASLTransformer.forward of ASLTransformer(\n",
+      "               (encoder_input): LandmarkEmbedding(\n",
+      "                 (conv1): Conv1d(1, 64, kern...\n",
+      "\n",
+      "  File \"\u001b[32mC:\\Users\\Yinka\\AppData\\Local\\Temp\\ipykernel_3812\\\u001b[0m\u001b[32m\u001b[1m440851353.py\u001b[0m\", line \u001b[33m285\u001b[0m, in \u001b[35mforward\u001b[0m\n",
+      "    \u001b[1mencoder_out\u001b[0m \u001b[35m\u001b[1m=\u001b[0m \u001b[1mself\u001b[0m\u001b[35m\u001b[1m.\u001b[0m\u001b[1mencoder\u001b[0m\u001b[1m(\u001b[0m\u001b[1msource\u001b[0m\u001b[1m)\u001b[0m\n",
+      "    \u001b[36m              │            └ \u001b[0m\u001b[36m\u001b[1mtensor([[[0.4061, 0.6164, 0.6337,  ..., 0.5066, 0.4483, 0.4846],\u001b[0m\n",
+      "    \u001b[36m              │              \u001b[0m\u001b[36m\u001b[1m         [0.0074, 0.1099, 0.6021,  ..., 0.5536, 0.7102, 0.49...\u001b[0m\n",
+      "    \u001b[36m              └ \u001b[0m\u001b[36m\u001b[1mASLTransformer(\u001b[0m\n",
+      "    \u001b[36m                \u001b[0m\u001b[36m\u001b[1m  (encoder_input): LandmarkEmbedding(\u001b[0m\n",
+      "    \u001b[36m                \u001b[0m\u001b[36m\u001b[1m    (conv1): Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,)...\u001b[0m\n",
+      "\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1501, in _call_impl\n",
+      "    return forward_call(*args, **kwargs)\n",
+      "           │             │       └ {}\n",
+      "           │             └ (tensor([[[0.4061, 0.6164, 0.6337,  ..., 0.5066, 0.4483, 0.4846],\n",
+      "           │                        [0.0074, 0.1099, 0.6021,  ..., 0.5536, 0.7102, 0.4...\n",
+      "           └ <bound method Sequential.forward of Sequential(\n",
+      "               (0): LandmarkEmbedding(\n",
+      "                 (conv1): Conv1d(1, 64, kernel_size=(11,), strid...\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\container.py\", line 217, in forward\n",
+      "    input = module(input)\n",
+      "            │      └ tensor([[[0.4061, 0.6164, 0.6337,  ..., 0.5066, 0.4483, 0.4846],\n",
+      "            │                 [0.0074, 0.1099, 0.6021,  ..., 0.5536, 0.7102, 0.49...\n",
+      "            └ LandmarkEmbedding(\n",
+      "                (conv1): Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,))\n",
+      "                (conv2): Conv1d(64, 128, kernel_s...\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1501, in _call_impl\n",
+      "    return forward_call(*args, **kwargs)\n",
+      "           │             │       └ {}\n",
+      "           │             └ (tensor([[[0.4061, 0.6164, 0.6337,  ..., 0.5066, 0.4483, 0.4846],\n",
+      "           │                        [0.0074, 0.1099, 0.6021,  ..., 0.5536, 0.7102, 0.4...\n",
+      "           └ <bound method LandmarkEmbedding.forward of LandmarkEmbedding(\n",
+      "               (conv1): Conv1d(1, 64, kernel_size=(11,), stride=(2,), paddin...\n",
+      "\n",
+      "  File \"\u001b[32mC:\\Users\\Yinka\\AppData\\Local\\Temp\\ipykernel_3812\\\u001b[0m\u001b[32m\u001b[1m440851353.py\u001b[0m\", line \u001b[33m86\u001b[0m, in \u001b[35mforward\u001b[0m\n",
+      "    \u001b[1mx\u001b[0m \u001b[35m\u001b[1m=\u001b[0m \u001b[1mtorch\u001b[0m\u001b[35m\u001b[1m.\u001b[0m\u001b[1mrelu\u001b[0m\u001b[1m(\u001b[0m\u001b[1mself\u001b[0m\u001b[35m\u001b[1m.\u001b[0m\u001b[1mconv1\u001b[0m\u001b[1m(\u001b[0m\u001b[1mx\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\n",
+      "    \u001b[36m    │     │    │          └ \u001b[0m\u001b[36m\u001b[1mtensor([[[[0.4061, 0.6164, 0.6337,  ..., 0.5066, 0.4483, 0.4846],\u001b[0m\n",
+      "    \u001b[36m    │     │    │            \u001b[0m\u001b[36m\u001b[1m          [0.0074, 0.1099, 0.6021,  ..., 0.5536, 0.7102, 0....\u001b[0m\n",
+      "    \u001b[36m    │     │    └ \u001b[0m\u001b[36m\u001b[1mLandmarkEmbedding(\u001b[0m\n",
+      "    \u001b[36m    │     │      \u001b[0m\u001b[36m\u001b[1m  (conv1): Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,))\u001b[0m\n",
+      "    \u001b[36m    │     │      \u001b[0m\u001b[36m\u001b[1m  (conv2): Conv1d(64, 128, kernel_s...\u001b[0m\n",
+      "    \u001b[36m    │     └ \u001b[0m\u001b[36m\u001b[1m<built-in method relu of type object at 0x00007FFEA23CC560>\u001b[0m\n",
+      "    \u001b[36m    └ \u001b[0m\u001b[36m\u001b[1m<module 'torch' from 'c:\\\\Users\\\\Yinka\\\\AppData\\\\Local\\\\Programs\\\\Python\\\\Python311\\\\Lib\\\\site-packages\\\\torch\\\\__init__.py'>\u001b[0m\n",
+      "\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1501, in _call_impl\n",
+      "    return forward_call(*args, **kwargs)\n",
+      "           │             │       └ {}\n",
+      "           │             └ (tensor([[[[0.4061, 0.6164, 0.6337,  ..., 0.5066, 0.4483, 0.4846],\n",
+      "           │                         [0.0074, 0.1099, 0.6021,  ..., 0.5536, 0.7102, 0...\n",
+      "           └ <bound method Conv1d.forward of Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,))>\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\conv.py\", line 313, in forward\n",
+      "    return self._conv_forward(input, self.weight, self.bias)\n",
+      "           │    │             │      │            └ Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,))\n",
+      "           │    │             │      └ Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,))\n",
+      "           │    │             └ tensor([[[[0.4061, 0.6164, 0.6337,  ..., 0.5066, 0.4483, 0.4846],\n",
+      "           │    │                         [0.0074, 0.1099, 0.6021,  ..., 0.5536, 0.7102, 0....\n",
+      "           │    └ <function Conv1d._conv_forward at 0x000001752F154180>\n",
+      "           └ Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,))\n",
+      "  File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\conv.py\", line 309, in _conv_forward\n",
+      "    return F.conv1d(input, weight, bias, self.stride,\n",
+      "           │ │      │      │       │     │    └ (2,)\n",
+      "           │ │      │      │       │     └ Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,))\n",
+      "           │ │      │      │       └ Parameter containing:\n",
+      "           │ │      │      │         tensor([-0.1697, -0.2340, -0.0869,  0.0877, -0.2328,  0.2527, -0.2036,  0.0177,\n",
+      "           │ │      │      │                 -0.2673, -0.069...\n",
+      "           │ │      │      └ Parameter containing:\n",
+      "           │ │      │        tensor([[[-0.1422, -0.0277,  0.0092, -0.0871,  0.2008, -0.2052, -0.2548,\n",
+      "           │ │      │                   0.0125,  0.0531,  0...\n",
+      "           │ │      └ tensor([[[[0.4061, 0.6164, 0.6337,  ..., 0.5066, 0.4483, 0.4846],\n",
+      "           │ │                  [0.0074, 0.1099, 0.6021,  ..., 0.5536, 0.7102, 0....\n",
+      "           │ └ <built-in method conv1d of type object at 0x00007FFEA23CC560>\n",
+      "           └ <module 'torch.nn.functional' from 'c:\\\\Users\\\\Yinka\\\\AppData\\\\Local\\\\Programs\\\\Python\\\\Python311\\\\Lib\\\\site-packages\\\\torch\\...\n",
+      "\n",
+      "\u001b[31m\u001b[1mRuntimeError\u001b[0m:\u001b[1m Expected 2D (unbatched) or 3D (batched) input to conv1d, but got input of size: [1, 1, 128, 345]\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create a sample input\n",
+    "batch_source_sequence = torch.randn(2, 128, 345)  # Sample source sequence (batch_size, maxlen, num_hid)\n",
+    "batch_target_sequence = torch.randint(0, 60, (2, 64))  # Sample target sequence (batch_size, maxlen)\n",
+    "single_src_seq = torch.rand(128,345)\n",
+    "single_trg_seq = torch.randint(0,60,(64,))\n",
+    "\n",
+    "try:\n",
+    "    # Instantiate the Transformer model\n",
+    "    transformer_model = ASLTransformer(\n",
+    "        num_hidden_dim=200,\n",
+    "        multi_num_head= 4,\n",
+    "        num_feed_forward=400,\n",
+    "        target_maxlen=64,\n",
+    "        num_layers_enc=2,\n",
+    "        num_layers_dec=1,)\n",
+    "\n",
+    "    # Forward pass to get predictions\n",
+    "    predictions = transformer_model(single_src_seq, single_trg_seq)\n",
+    "\n",
+    "    # Print the shape of the predictions\n",
+    "    print(f\"final {predictions.shape}\")\n",
+    "    \n",
+    "except Exception as error:\n",
+    "    logger.exception(f\" ERROR Message ==> {error}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([128, 346])\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "# Your original sequence tensor\n",
+    "original_tensor = torch.randn(118, 346)\n",
+    "\n",
+    "# Define the desired output shape\n",
+    "desired_shape = (128, 346)\n",
+    "\n",
+    "# Calculate the padding on the first dimension from the bottom\n",
+    "padding_bottom = max(0, desired_shape[0] - original_tensor.size(0))\n",
+    "\n",
+    "# Pad the tensor along the first dimension from the bottom\n",
+    "padded_tensor = torch.nn.functional.pad(original_tensor, (0, 0, 0, padding_bottom))\n",
+    "\n",
+    "# Now, padded_tensor has the shape (128, 346)\n",
+    "print(padded_tensor.shape)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[ 0.0490,  1.5782, -0.0793, -0.8889, -0.6999],\n",
+       "        [ 0.3881,  1.1002, -0.7594, -1.0423,  1.1450],\n",
+       "        [ 2.1911,  0.6852,  0.7096, -1.1343, -0.3205]])"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "original_tensor[115:,:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[ 0.0490,  1.5782, -0.0793, -0.8889, -0.6999],\n",
+       "        [ 0.3881,  1.1002, -0.7594, -1.0423,  1.1450],\n",
+       "        [ 2.1911,  0.6852,  0.7096, -1.1343, -0.3205],\n",
+       "        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],\n",
+       "        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],\n",
+       "        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],\n",
+       "        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],\n",
+       "        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],\n",
+       "        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],\n",
+       "        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],\n",
+       "        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],\n",
+       "        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],\n",
+       "        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]])"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "padded_tensor[115:,:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From c3a78ff9241405f0630be2ccd4e6fc6c2b27be89 Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Mon, 11 Dec 2023 06:09:07 +0100
Subject: [PATCH 12/16] [add] added frame preprocess test

---
 linguify_yb/src/tests/test_data_ingestion.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/linguify_yb/src/tests/test_data_ingestion.py b/linguify_yb/src/tests/test_data_ingestion.py
index b4e86067..e28f43ab 100644
--- a/linguify_yb/src/tests/test_data_ingestion.py
+++ b/linguify_yb/src/tests/test_data_ingestion.py
@@ -1,8 +1,18 @@
 "doc"
 
-import torch
 import pytest
 
+import torch
+from torch.utils.data import DataLoader
+from src.dataset.frames_config import FRAME_LEN
+from src.dataset.preprocess import clean_frames_process
+
 
-def test_data_columns():
-    pass
+@pytest.mark.parametrize(
+    "frames",
+    [torch.randn(num_frames, 345) for num_frames in [10, 108, 128, 156, 750, 420]],
+)
+def test_frames_preprocess(frames):
+    clean_frames = clean_frames_process(frames)
+    expected_output_shape = (128, 345)
+    assert expected_output_shape == clean_frames.shape

From f92ba03f4a11920d619364c9ded239066d2f428c Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Mon, 11 Dec 2023 06:09:19 +0100
Subject: [PATCH 13/16] [add] updates

---
 linguify_yb/src/tests/test_model.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/linguify_yb/src/tests/test_model.py b/linguify_yb/src/tests/test_model.py
index c1206d1c..aca36478 100644
--- a/linguify_yb/src/tests/test_model.py
+++ b/linguify_yb/src/tests/test_model.py
@@ -7,6 +7,7 @@
 from torch.utils.data import DataLoader
 from src.models.baseline_transformer import ASLTransformer
 
+
 @pytest.fixture
 def baseline_model():
     """_summary_"""
@@ -24,7 +25,8 @@ def test_baseline_transformer_output_shape(baseline_model, inputs_x, target_y):
     expected_output_shape = (64, 62)
     assert output.shape == expected_output_shape
 
-@pytest.mark.parametrize("inputs_x", [(torch.randn(128,345)), (torch.randn(128,345))])
+
+@pytest.mark.parametrize("inputs_x", [(torch.randn(128, 345)), (torch.randn(128, 345))])
 def test_baseline_transformer_generate_out(
     baseline_model,
     inputs_x,
@@ -35,5 +37,21 @@ def test_baseline_transformer_generate_out(
     expected_output_len = 64
     assert len(output) == expected_output_len
 
-@pytest.mark.parametrize("input_shape", [(batch_size, input_dim) for batch_size in [1, 2, 5] for input_dim in [32, 64, 128]])
-def
\ No newline at end of file
+
+@pytest.mark.parametrize(
+    "inputs_x, target_y, batch_size",
+    [
+        (
+            torch.randn(batch_size, 128, 345),
+            torch.randint(0, 60, (batch_size, 64)),
+            batch_size,
+        )
+        for batch_size in [1, 2, 4, 8]
+    ],
+)
+def test_baseline_transformer_batch_shape(
+    baseline_model, inputs_x, target_y, batch_size
+):
+    output = baseline_model(inputs_x, target_y)
+    expected_output_shape = (batch_size, 64, 62)
+    assert output.shape == expected_output_shape

From 28101b8dfed25be41250a2275639542b3a87c699 Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Mon, 11 Dec 2023 06:10:01 +0100
Subject: [PATCH 14/16] [add] mics

---
 linguify_yb/src/dataset/frames_config.py | 94 +++++++++++++++---------
 linguify_yb/src/dataset/preprocess.py    | 91 +++++------------------
 2 files changed, 79 insertions(+), 106 deletions(-)

diff --git a/linguify_yb/src/dataset/frames_config.py b/linguify_yb/src/dataset/frames_config.py
index 98fb2d3b..a5b94ef1 100644
--- a/linguify_yb/src/dataset/frames_config.py
+++ b/linguify_yb/src/dataset/frames_config.py
@@ -1,43 +1,71 @@
 """doc
 """
 
-FRAME_LEN = 128
 
-LPOSE = [13, 15, 17, 19, 21]
-RPOSE = [14, 16, 18, 20, 22]
-POSE = LPOSE + RPOSE
+FRAME_LEN = 128
+LIP = [
+    61,
+    185,
+    40,
+    39,
+    37,
+    0,
+    267,
+    269,
+    270,
+    409,
+    291,
+    146,
+    91,
+    181,
+    84,
+    17,
+    314,
+    405,
+    321,
+    375,
+    78,
+    191,
+    80,
+    81,
+    82,
+    13,
+    312,
+    311,
+    310,
+    415,
+    95,
+    88,
+    178,
+    87,
+    14,
+    317,
+    402,
+    318,
+    324,
+    308,
+]
 
-X = (
+FRAME = ["frame"]
+N_LHAND = (
+    [f"x_left_hand_{i}" for i in range(21)]
+    + [f"y_left_hand_{i}" for i in range(21)]
+    + [f"z_left_hand_{i}" for i in range(21)]
+)
+N_RHAND = (
     [f"x_right_hand_{i}" for i in range(21)]
-    + [f"x_left_hand_{i}" for i in range(21)]
-    + [f"x_pose_{i}" for i in POSE]
+    + [f"y_right_hand_{i}" for i in range(21)]
+    + [f"z_right_hand_{i}" for i in range(21)]
 )
-Y = (
-    [f"y_right_hand_{i}" for i in range(21)]
-    + [f"y_left_hand_{i}" for i in range(21)]
-    + [f"y_pose_{i}" for i in POSE]
+N_POSE = (
+    [f"x_pose_{i}" for i in range(33)]
+    + [f"y_pose_{i}" for i in range(33)]
+    + [f"z_pose_{i}" for i in range(33)]
 )
-Z = (
-    [f"z_right_hand_{i}" for i in range(21)]
-    + [f"z_left_hand_{i}" for i in range(21)]
-    + [f"z_pose_{i}" for i in POSE]
+N_FACE = (
+    [f"x_face_{i}" for i in LIP]
+    + [f"y_face_{i}" for i in LIP]
+    + [f"z_face_{i}" for i in LIP]
 )
 
-FEATURE_COLUMNS = X + Y + Z
-
-X_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if "x_" in col]
-Y_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if "y_" in col]
-Z_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if "z_" in col]
-
-RHAND_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if "right" in col]
-LHAND_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if "left" in col]
-RPOSE_IDX = [
-    i
-    for i, col in enumerate(FEATURE_COLUMNS)
-    if "pose" in col and int(col[-2:]) in RPOSE
-]
-LPOSE_IDX = [
-    i
-    for i, col in enumerate(FEATURE_COLUMNS)
-    if "pose" in col and int(col[-2:]) in LPOSE
-]
+FEATURE_COLUMNS = FRAME + N_LHAND + N_RHAND + N_POSE + N_FACE
diff --git a/linguify_yb/src/dataset/preprocess.py b/linguify_yb/src/dataset/preprocess.py
index 9339f071..21a67e64 100644
--- a/linguify_yb/src/dataset/preprocess.py
+++ b/linguify_yb/src/dataset/preprocess.py
@@ -3,83 +3,28 @@
 import torch
 from torch.nn import functional as F
 
-from linguify_yb.src.dataset.frames_config import (FRAME_LEN, LHAND_IDX,
-                                                   LPOSE_IDX, RHAND_IDX,
-                                                   RPOSE_IDX)
+# from dataset.frames_config import FRAME_LEN
 
 # TODO Clean up code, add comments and docs
 # TODO remove print and debug statements
 
-# Preprocess frame
 
-
-def resize_pad(x):
-    if x.shape[0] < FRAME_LEN:
-        x = F.pad(x, (0, 0, 0, FRAME_LEN - x.shape[0], 0, 0))
-    else:
-        x = x.unsqueeze(0)  # Add batch and channel dimensions
-        x = torch.nn.functional.interpolate(
-            x, size=(FRAME_LEN, x.shape[1]), mode="bilinear", align_corners=False
-        ).squeeze(0)
-
-    return x
-
-
-def frames_preprocess(x):
-    x = torch.tensor(x)
-    rhand = x[:, RHAND_IDX]
-    lhand = x[:, LHAND_IDX]
-    rpose = x[:, RPOSE_IDX]
-    lpose = x[:, LPOSE_IDX]
-
-    rnan_idx = torch.any(torch.isnan(rhand), dim=1)
-    lnan_idx = torch.any(torch.isnan(lhand), dim=1)
-
-    rnans = torch.sum(rnan_idx)
-    lnans = torch.sum(lnan_idx)
-
-    if rnans > lnans:
-        hand = lhand
-        pose = lpose
-
-        hand_x = hand[:, 0 * (len(LHAND_IDX) // 3) : 1 * (len(LHAND_IDX) // 3)]
-        hand_y = hand[:, 1 * (len(LHAND_IDX) // 3) : 2 * (len(LHAND_IDX) // 3)]
-        hand_z = hand[:, 2 * (len(LHAND_IDX) // 3) : 3 * (len(LHAND_IDX) // 3)]
-        hand = torch.cat([1 - hand_x, hand_y, hand_z], dim=1)
-
-        pose_x = pose[:, 0 * (len(LPOSE_IDX) // 3) : 1 * (len(LPOSE_IDX) // 3)]
-        pose_y = pose[:, 1 * (len(LPOSE_IDX) // 3) : 2 * (len(LPOSE_IDX) // 3)]
-        pose_z = pose[:, 2 * (len(LPOSE_IDX) // 3) : 3 * (len(LPOSE_IDX) // 3)]
-        pose = torch.cat([1 - pose_x, pose_y, pose_z], dim=1)
-    else:
-        hand = rhand
-        pose = rpose
-
-    hand_x = hand[:, 0 * (len(LHAND_IDX) // 3) : 1 * (len(LHAND_IDX) // 3)]
-    hand_y = hand[:, 1 * (len(LHAND_IDX) // 3) : 2 * (len(LHAND_IDX) // 3)]
-    hand_z = hand[:, 2 * (len(LHAND_IDX) // 3) : 3 * (len(LHAND_IDX) // 3)]
-    hand = torch.cat(
-        [hand_x.unsqueeze(-1), hand_y.unsqueeze(-1), hand_z.unsqueeze(-1)], dim=-1
-    )
-
-    mean = torch.mean(hand, dim=1).unsqueeze(1)
-    std = torch.std(hand, dim=1).unsqueeze(1)
-    hand = (hand - mean) / std
-
-    pose_x = pose[:, 0 * (len(LPOSE_IDX) // 3) : 1 * (len(LPOSE_IDX) // 3)]
-    pose_y = pose[:, 1 * (len(LPOSE_IDX) // 3) : 2 * (len(LPOSE_IDX) // 3)]
-    pose_z = pose[:, 2 * (len(LPOSE_IDX) // 3) : 3 * (len(LPOSE_IDX) // 3)]
-    pose = torch.cat(
-        [pose_x.unsqueeze(-1), pose_y.unsqueeze(-1), pose_z.unsqueeze(-1)], dim=-1
-    )
-
-    x = torch.cat([hand, pose], dim=1)
-    #print(f"befor  re{x.shape}")
-    x = resize_pad(x)
-    #print(f"after re{x.shape}")
+def clean_frames_process(
+    x, max_frame_len=128, n_hand_landmarks=21, n_pose_landmarks=33, n_face_landmarks=40
+):
+    x = x[:max_frame_len]
     x = torch.where(torch.isnan(x), torch.zeros_like(x), x)
-    #print(x.shape)
-
-    #! CRITICAL Debug
-    # x = x.view(FRAME_LEN, len(LHAND_IDX) + len(LPOSE_IDX))
+    n_frames = x.size(0)
+    lhand = x[:, 0:63].view(n_frames, 3, n_hand_landmarks).transpose(1, 2)
+    rhand = x[:, 63:126].view(n_frames, 3, n_hand_landmarks).transpose(1, 2)
+    pose = x[:, 126:225].view(n_frames, 3, n_pose_landmarks).transpose(1, 2)
+    face = x[:, 225:345].view(n_frames, 3, n_face_landmarks).transpose(1, 2)
+
+    x = torch.cat([lhand, rhand, pose, face], axis=1)
+    x = x.view(n_frames, 345)
+    if n_frames < max_frame_len:
+        # Calculate the padding on the first dimension from the bottom
+        padding_bottom = max(0, max_frame_len - x.size(0))
+        # Pad the tensor along the first dimension from the bottom
+        x = F.pad(x, (0, 0, 0, padding_bottom))
     return x

From 5383afe3e9b8677fde9a82f0b4a4ab57e065a9cb Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Tue, 12 Dec 2023 17:28:56 +0100
Subject: [PATCH 15/16] updates

---
 linguify_yb/development/code_dev.ipynb        |  44 +++++-
 linguify_yb/development/dev.ipynb             |   3 +-
 linguify_yb/src/benchmark.py                  |   7 +
 linguify_yb/src/config.py                     |   6 +
 linguify_yb/src/dataset/dataset_loader.py     | 119 +++++++++++++++
 linguify_yb/src/dataset/frames_config.py      |   1 +
 linguify_yb/src/dataset/preprocess.py         |   2 -
 linguify_yb/src/dev_data.py                   | 123 +++++++++++++++
 linguify_yb/src/main.py                       | 101 +++++++++++++
 .../src/models/baseline_transformer.py        |  20 +--
 linguify_yb/src/models/model_loader.py        |  31 +---
 linguify_yb/src/models/static_transfromer.py  |   0
 linguify_yb/src/tests/test_data_ingestion.py  |  16 ++
 linguify_yb/src/trainer.py                    | 141 ++++++++++++++++++
 linguify_yb/src/utils/util.py                 |  77 ++++++++++
 version.txt                                   |   0
 16 files changed, 650 insertions(+), 41 deletions(-)
 create mode 100644 linguify_yb/src/config.py
 create mode 100644 linguify_yb/src/dataset/dataset_loader.py
 create mode 100644 linguify_yb/src/dev_data.py
 create mode 100644 linguify_yb/src/main.py
 create mode 100644 linguify_yb/src/models/static_transfromer.py
 create mode 100644 linguify_yb/src/trainer.py
 create mode 100644 linguify_yb/src/utils/util.py
 create mode 100644 version.txt

diff --git a/linguify_yb/development/code_dev.ipynb b/linguify_yb/development/code_dev.ipynb
index a41f02c2..251ebeaf 100644
--- a/linguify_yb/development/code_dev.ipynb
+++ b/linguify_yb/development/code_dev.ipynb
@@ -129,12 +129,54 @@
     "]\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(63, 63)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(RHAND_IDX), len(LHAND_IDX)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "def read_file(file, file_id, landmarks_metadata_path):\n",
+    "    phrase_list = []\n",
+    "    frames_list = []\n",
+    "    metadata_train_dataframe = pd.read_csv(landmarks_metadata_path)\n",
+    "    file_id_df = metadata_train_dataframe.loc[\n",
+    "        metadata_train_dataframe[\"file_id\"] == file_id\n",
+    "    ]\n",
+    "    saved_parueat_df = pq.read_table(\n",
+    "        file, columns=[\"sequence_id\"] + FEATURE_COLUMNS\n",
+    "    ).to_pandas()\n",
+    "    for seq_id, phrase in zip(file_id_df.sequence_id, file_id_df.phrase):\n",
+    "        frames = saved_parueat_df[saved_parueat_df.index == seq_id].to_numpy()\n",
+    "        # NaN\n",
+    "        right_num_nan = np.sum(np.sum(np.isnan(frames[:, RHAND_IDX]), axis=1) == 0)\n",
+    "        left_num_nan = np.sum(np.sum(np.isnan(frames[:, LHAND_IDX]), axis=1) == 0)\n",
+    "        \n",
+    "        total_num_nan = max(right_num_nan, left_num_nan)\n",
+    "        if 2 * len(phrase) < total_num_nan:\n",
+    "            frames_list.append(frames)\n",
+    "            phrase_list.append(phrase)\n",
+    "    return (frames_list, phrase_list)\n"
+   ]
   }
  ],
  "metadata": {
diff --git a/linguify_yb/development/dev.ipynb b/linguify_yb/development/dev.ipynb
index 9224fe47..07faeb77 100644
--- a/linguify_yb/development/dev.ipynb
+++ b/linguify_yb/development/dev.ipynb
@@ -42,8 +42,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ans = [(torch.randn(batch_size, 128, 345), torch.randint(0, 60, (batch_size, 64)), batch_size) \n",
-    "                           for batch_size in [1,2,4,8]]"
+    "torch.is_distri"
    ]
   },
   {
diff --git a/linguify_yb/src/benchmark.py b/linguify_yb/src/benchmark.py
index 4c710533..9b83888a 100644
--- a/linguify_yb/src/benchmark.py
+++ b/linguify_yb/src/benchmark.py
@@ -11,10 +11,15 @@
 
 
 class BenchMarker:
+    """_summary_"""
+
     def __init__(self) -> None:
         pass
 
     def get_model_macs(self, model, inputs=None) -> int:
+        """
+        calculate the MACS of a model
+        """
         return profile_macs(model, inputs)
 
     def get_model_sparsity(self, model: nn.Module) -> float:
@@ -56,3 +61,5 @@ def runner(self, model):
         model_sparsity = self.get_model_sparsity(model)
         model_num_params = self.get_num_parameters(model)
         model_size = self.get_model_size(model)
+
+        return
diff --git a/linguify_yb/src/config.py b/linguify_yb/src/config.py
new file mode 100644
index 00000000..98a1d336
--- /dev/null
+++ b/linguify_yb/src/config.py
@@ -0,0 +1,6 @@
+"""doc
+"""
+from pydantic import BaseModel
+
+class Data(BaseModel):
+    
\ No newline at end of file
diff --git a/linguify_yb/src/dataset/dataset_loader.py b/linguify_yb/src/dataset/dataset_loader.py
new file mode 100644
index 00000000..947531f8
--- /dev/null
+++ b/linguify_yb/src/dataset/dataset_loader.py
@@ -0,0 +1,119 @@
+"""doc
+"""
+
+import json
+
+import numpy as np
+import pandas as pd
+import pyarrow.parquet as pq
+import torch
+from torch.nn import functional as F
+from torch.utils.data import DataLoader, Dataset
+
+from dataset.frames_config import FEATURE_COLUMNS, FRAME_LEN, LHAND_IDX, RHAND_IDX
+from dataset.preprocess import clean_frames_process
+
+PHRASE_PATH = "/kaggle/input/asl-fingerspelling/character_to_prediction_index.json"
+METADATA = "/kaggle/input/asl-fingerspelling/train.csv"
+
+with open(PHRASE_PATH, "r", encoding="utf-8") as f:
+    character_to_num = json.load(f)
+
+PAD_TOKEN = "P"
+START_TOKEN = "<"
+END_TOKEN = ">"
+PAD_TOKEN_IDX = 59
+START_TOKEN_IDX = 60
+END_TOKEN_IDX = 61
+
+character_to_num[PAD_TOKEN] = PAD_TOKEN_IDX
+character_to_num[START_TOKEN] = START_TOKEN_IDX
+character_to_num[END_TOKEN] = END_TOKEN_IDX
+num_to_character = {j: i for i, j in character_to_num.items()}
+
+
+class TokenHashTable:
+    def __init__(
+        self, word2index_mapping=character_to_num, index2word_mapping=num_to_character
+    ):
+        self.word2index = word2index_mapping
+        self.index2word = index2word_mapping
+
+    def _indexesfromsentence(self, sentence):
+        return [self.word2index[word] for word in sentence]
+
+    def tensorfromsentence(self, sentence):
+        indexes = self._indexesfromsentence(sentence)
+        return torch.tensor(indexes, dtype=torch.long)  # .view(1, -1)
+
+    def indexes_to_sentence(self, indexes_list):
+        if torch.is_tensor(indexes_list):
+            indexes_list = indexes_list.tolist()
+        words = [self.index2word[idx] for idx in indexes_list]
+        return words
+
+
+def read_file(file, file_id, landmarks_metadata_path):
+    phrase_list = []
+    frames_list = []
+    metadata_train_dataframe = pd.read_csv(landmarks_metadata_path)
+    file_id_df = metadata_train_dataframe.loc[
+        metadata_train_dataframe["file_id"] == file_id
+    ]
+    saved_parueat_df = pq.read_table(
+        file, columns=["sequence_id"] + FEATURE_COLUMNS
+    ).to_pandas()
+    for seq_id, phrase in zip(file_id_df.sequence_id, file_id_df.phrase):
+        frames = saved_parueat_df[saved_parueat_df.index == seq_id].to_numpy()
+        # NaN
+        frames_list.append(torch.tensor(frames))
+        phrase_list.append(phrase)
+    return (frames_list, phrase_list)
+
+
+class LandmarkDataset(Dataset):
+    def __init__(self, file_path, file_id, table, transform=True):
+        self.landmarks_metadata_path = METADATA
+        self.frames, self.labels = read_file(
+            file_path, file_id, self.landmarks_metadata_path
+        )
+        self.trans = transform
+        self.table = table
+
+    def _label_pre(self, label_sample):
+        sample = START_TOKEN + label_sample + END_TOKEN
+        new_phrase = self.table.tensorfromsentence(list(sample))
+        ans = F.pad(
+            input=new_phrase,
+            pad=[0, 64 - new_phrase.shape[0]],
+            mode="constant",
+            value=PAD_TOKEN_IDX,
+        )
+        return ans
+
+    def __len__(self):
+        return len(self.labels)
+
+    def __getitem__(self, idx):
+        if torch.is_tensor(idx):
+            idx = idx.tolist()
+        phrase = self.labels[idx]
+        frames = self.frames[idx]
+
+        if self.trans:
+            phrase = self._label_pre(phrase)
+            frames = clean_frames_process(frames)
+        return frames, phrase
+
+
+def get_dataloader(file_path, file_id, batch_size=32, num_workers_=1):
+    lookup_table = TokenHashTable(character_to_num, num_to_character)
+    dataset = LandmarkDataset(file_path, file_id, lookup_table, transform=True)
+
+    dataloader = DataLoader(
+        dataset,
+        batch_size=batch_size,
+        num_workers=num_workers_,
+        pin_memory=True,
+    )
+    return dataloader
diff --git a/linguify_yb/src/dataset/frames_config.py b/linguify_yb/src/dataset/frames_config.py
index a5b94ef1..2f277968 100644
--- a/linguify_yb/src/dataset/frames_config.py
+++ b/linguify_yb/src/dataset/frames_config.py
@@ -52,6 +52,7 @@
     + [f"y_left_hand_{i}" for i in range(21)]
     + [f"z_left_hand_{i}" for i in range(21)]
 )
+
 N_RHAND = (
     [f"x_right_hand_{i}" for i in range(21)]
     + [f"y_right_hand_{i}" for i in range(21)]
diff --git a/linguify_yb/src/dataset/preprocess.py b/linguify_yb/src/dataset/preprocess.py
index 21a67e64..192dd73a 100644
--- a/linguify_yb/src/dataset/preprocess.py
+++ b/linguify_yb/src/dataset/preprocess.py
@@ -3,8 +3,6 @@
 import torch
 from torch.nn import functional as F
 
-# from dataset.frames_config import FRAME_LEN
-
 # TODO Clean up code, add comments and docs
 # TODO remove print and debug statements
 
diff --git a/linguify_yb/src/dev_data.py b/linguify_yb/src/dev_data.py
new file mode 100644
index 00000000..a861d8b0
--- /dev/null
+++ b/linguify_yb/src/dev_data.py
@@ -0,0 +1,123 @@
+"""Dataset Download Module
+
+This module provides functions to download the a subsample of Google ASL dataset.
+
+Functions:
+- download_dataset(url: str, destination: str, path):
+  Downloads a dataset from the given URL to the specified destination directory.
+- main - the main function to run the script
+"""
+
+
+import os
+import shutil
+import subprocess
+import zipfile
+
+from utils.logger_util import logger
+
+DATA_DIR = "data/asl-fingerspelling/"
+data_files = ["train.csv", "character_to_prediction_index.json"]
+train_landmarks = ["1019715464.parquet", "1021040628.parquet", "105143404.parquet"]
+TRAIN_LANDMARKS_DIR = "train_landmarks/"
+
+COMMAND = [
+    "kaggle",
+    "competitions",
+    "download",
+    "-c",
+    "asl-fingerspelling",
+    "-f",
+    "FILE",
+    "-p",
+    "data/raw/",
+]
+
+
+def check_storage(project_dir=os.getcwd()):
+    """check and return availabe storage space
+
+    Parameters
+    ----------
+    directory_path : str, Path
+        current working directory/directory path
+
+    Returns
+    -------
+    int
+        the size of available storage space (GB)
+
+    Raises
+    ------
+    StorageFullError
+        exception for when storage is full.
+    """
+    total, used, free = shutil.disk_usage(project_dir)
+    total_size_gb = round(total / (2**30), 2)
+    used_size_gb = round(used / (2**30), 2)
+    free_size_gb = round(free / (2**30), 2)
+    if used_size_gb / total_size_gb >= 0.8:
+        raise StorageFullError
+    return free_size_gb
+
+
+class StorageFullError(Exception):
+    """Custom exception for when storage is full."""
+
+    pass
+
+
+def downlaod_file(cmd, unzipped_file_path, data_dir):
+    """Download file using kaggle API
+
+    Parameters
+    ----------
+    cmd : list
+        Kaggle API Commands
+    unzipped_file : str, Path
+        path of the unzipped file
+    data_dir : str, Path
+        the directory where the data should be downloaded into
+    """
+    subprocess.run(cmd, check=True, text=True)
+    if (
+        os.path.exists(unzipped_file_path)
+        and os.path.splitext(unzipped_file_path)[1].lower() == ".zip"
+    ):
+        # Unzipping and delete the zipped file to free storage
+        with zipfile.ZipFile(unzipped_file_path, "r") as zip_ref:
+            zip_ref.extractall(data_dir)
+        os.remove(unzipped_file_path)
+    else:
+        pass
+
+
+def main():
+    """the main function to run the script"""
+    logger.info("Commencing downloading the dataset")
+    try:
+        logger.info(f"Current Available space {check_storage()}GB")
+        for file in data_files:
+            logger.info(f"Downloading{file} in {DATA_DIR}")
+            COMMAND[6] = file
+            unzipfile_path = DATA_DIR + file + ".zip"
+            downlaod_file(COMMAND, unzipfile_path, DATA_DIR)
+            logger.info(f" {file} downloaded succesful")
+        # Downloading the LANDMARKS files
+        for parquet_file in train_landmarks:
+            logger.info(f"Current Available space {check_storage()}GB")
+            file_path = TRAIN_LANDMARKS_DIR + parquet_file
+            COMMAND[6] = file_path
+            COMMAND[8] = DATA_DIR + TRAIN_LANDMARKS_DIR
+            unzipfile_path = DATA_DIR + file_path + ".zip"
+            downlaod_file(COMMAND, unzipfile_path, DATA_DIR + TRAIN_LANDMARKS_DIR)
+            logger.info(f"{parquet_file} downloaded succesfully")
+
+        logger.success("All files downloaded succesfully")
+
+    except Exception as error:
+        logger.exception(f"Data unloading was unsuccesfully due to {error}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/linguify_yb/src/main.py b/linguify_yb/src/main.py
new file mode 100644
index 00000000..ac12cfc8
--- /dev/null
+++ b/linguify_yb/src/main.py
@@ -0,0 +1,101 @@
+"""
+doc
+
+# Usage:
+# python -m src/train.py \
+# --epochs 10 \
+# --batch 512 \
+"""
+# TODO Complete and refactor code for distributed training
+
+import os
+import json
+
+import numpy as np
+import torch
+import wandb
+from torch import nn
+
+from utils.util import get_device_strategy, parse_args, set_seed
+from utils.logger_util import logger
+from models.model_loader import ModelLoader
+from dataset.dataset_loader import get_dataloader
+import trainer
+
+try:
+    dataset_paths = "data/dev_samples.json"  # On kaggle replace with "data/dataset_paths.json" to train on full data
+    with open(dataset_paths, "r", encoding="utf-8") as json_file:
+        data_dict = json.load(json_file)
+    LANDMARK_DIR = "/kaggle/input/asl-fingerspelling/train_landmarks"
+    MODEL_DIR = "model.pt"
+
+    # Training dataset
+    train_dataset = data_dict["train_files"]
+    train_file_ids = [os.path.basename(file) for file in train_dataset]
+    train_file_ids = [
+        int(file_name.replace(".parquet", "")) for file_name in train_file_ids
+    ]
+    assert len(train_dataset) == len(
+        train_file_ids
+    ), "Failed import of Train files path "
+    TRAIN_DS_FILES = list(zip(train_dataset, train_file_ids))
+
+    # Validation dataset
+    valid_dataset = data_dict["valid_files"]
+    valid_file_ids = [os.path.basename(file) for file in valid_dataset]
+    valid_file_ids = [
+        int(file_name.replace(".parquet", "")) for file_name in valid_file_ids
+    ]
+    assert len(train_dataset) == len(
+        train_file_ids
+    ), "Failed Import of Valid Files path"
+    VALID_DS_FILES = list(zip(valid_dataset, valid_file_ids))
+except AssertionError as asset_error:
+    logger.exception(f"failed {asset_error}")
+
+
+def main(arg):
+    logger.info(f"Starting training on {arg.model}")
+    # To ensure reproducibility of the training process
+    set_seed()
+    DEVICE = get_device_strategy(tpu=arg.tpu)
+    logger.info(f"Training on {DEVICE} for {arg.epochs} epochs.")
+
+    model = ModelLoader().get_model(arg.model)
+
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
+    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
+
+    # Optimizes given model/function using TorchDynamo and specified backend
+    torch.compile(model)
+
+    logger.info("training")
+    wandb.init(
+        project="ASL-project",
+        config={
+            "learning_rate": 0.01,
+            "architecture": "Test Model",
+            "dataset": "Google ASL Landmarks",
+            "epochs": 12,
+        },
+    )
+
+    wandb.watch(model)
+    try:
+        train(
+            model=arg.model,
+            optim=optimizer,
+            loss_func=criterion,
+            n_epochs=arg.epochs,
+            batch=arg.batch,
+            device=DEVICE,
+        )
+        logger.success(f"Training completed: {arg.epochs} epochs on {DEVICE}.")
+
+    except Exception as error:
+        logger.exception(f"Training failed due to an {error}.")
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
diff --git a/linguify_yb/src/models/baseline_transformer.py b/linguify_yb/src/models/baseline_transformer.py
index 2ab43725..5fc151e2 100644
--- a/linguify_yb/src/models/baseline_transformer.py
+++ b/linguify_yb/src/models/baseline_transformer.py
@@ -18,19 +18,19 @@
 
 
 class TokenEmbedding(nn.Module):
-    """_summary_"""
+    """Embed the tokens with postion encoding"""
 
     def __init__(self, num_vocab, maxlen, embedding_dim):
         """_summary_
 
         Parameters
         ----------
-        num_vocab : _type_
-            _description_
-        maxlen : _type_
-            _description_
-        embedding_dim : _type_
-            _description_
+        num_vocab : int
+            number of vocabulary
+        maxlen : int
+            maximuin length of sequence
+        embedding_dim : int
+            embedding output dimension
         """
         super().__init__()
         self.token_embed_layer = nn.Embedding(num_vocab, embedding_dim)
@@ -41,12 +41,12 @@ def forward(self, x):
 
         Parameters
         ----------
-        x : _type_
+        x : tensors
             _description_
 
         Returns
         -------
-        _type_
+        tensors
             _description_
         """
         maxlen = x.size(-1)
@@ -79,7 +79,7 @@ def __init__(self, embedding_dim):
         self.embedding_layer = nn.Linear(256, embedding_dim)
 
     def forward(self, x):
-        # Input x should have shape (batch_size, input_size)
+        # Input x should have shape (batch_size, input_size, input_dim)
         x = x.unsqueeze(1)  # Add a channel dimension for 1D convolution
 
         # Apply convolutional layers with ReLU activation and stride 2
diff --git a/linguify_yb/src/models/model_loader.py b/linguify_yb/src/models/model_loader.py
index b09b7710..aabc97b7 100644
--- a/linguify_yb/src/models/model_loader.py
+++ b/linguify_yb/src/models/model_loader.py
@@ -1,16 +1,16 @@
 """doc
 
 """
-import torch.nn as nn
-from linguify_yb.src.models import baseline_transfomer, test_model
+
+from models.baseline_transformer import ASLTransformer
+
 
 class ModelLoader:
     """Model Loader"""
 
     def __init__(self):
         self.models = {
-            "asl_transfomer": baseline_transfomer.build_model(),
-            "test_model": test_model.build_model(),
+            "asl_transfomer": ASLTransformer(),
         }
 
     def get_model(self, model_name):
@@ -30,25 +30,4 @@ def get_model(self, model_name):
         if model_name in self.models:
             return self.models[model_name]
         else:
-            raise ValueError
-
-
-
-# For Debugging
-class TestLinear(nn.Module):
-    def __init__(
-        self,
-    ) -> None:
-        super().__init__()
-        self.linear1 = nn.Linear(10, 100)
-        self.linear2 = nn.Linear(100, 10)
-        self.linear3 = nn.Linear(10, 2)
-        self.sequnn = nn.Sequential(self.linear1, self.linear2, self.linear3)
-
-    def forward(self, input_x):
-        outs = self.linear1(input_x)
-        return outs
-
-
-def build_model():
-    return TestLinear()
+            raise ValueError("Model is not in the model list")
diff --git a/linguify_yb/src/models/static_transfromer.py b/linguify_yb/src/models/static_transfromer.py
new file mode 100644
index 00000000..e69de29b
diff --git a/linguify_yb/src/tests/test_data_ingestion.py b/linguify_yb/src/tests/test_data_ingestion.py
index e28f43ab..1a51fd08 100644
--- a/linguify_yb/src/tests/test_data_ingestion.py
+++ b/linguify_yb/src/tests/test_data_ingestion.py
@@ -7,6 +7,11 @@
 from src.dataset.frames_config import FRAME_LEN
 from src.dataset.preprocess import clean_frames_process
 
+# TODO test for frames in right shapes, in tensor, frames are normalize
+# TODO test for frames dont contain NAN
+
+# TODO test for labels are tokensize
+
 
 @pytest.mark.parametrize(
     "frames",
@@ -16,3 +21,14 @@ def test_frames_preprocess(frames):
     clean_frames = clean_frames_process(frames)
     expected_output_shape = (128, 345)
     assert expected_output_shape == clean_frames.shape
+
+@pytest
+def test_TokenHashTable(tokentable):
+    token_table = 
+    sample_sentences = ""
+    sample_sentences_len = len(sample_sentences)
+    sample_sentences_token = [64,]
+    tokenize_result = token_table
+    assert sample_sentences_len == len(tokenize_result)
+    assert sample_sentences_token == tokenize_result
+
diff --git a/linguify_yb/src/trainer.py b/linguify_yb/src/trainer.py
new file mode 100644
index 00000000..c9f191c3
--- /dev/null
+++ b/linguify_yb/src/trainer.py
@@ -0,0 +1,141 @@
+"""
+doc
+
+# Usage:
+# python -m src/train.py \
+# --epochs 10 \
+# --batch 512 \
+"""
+# TODO Complete and refactor code for distributed training
+
+import os
+import json
+
+import numpy as np
+import torch
+import wandb
+from torch import nn
+
+from utils.logger_util import logger
+
+
+def train(model, optim, loss_func, n_epochs, batch, device,):
+
+    model.to(device)
+    
+    train_losses = []
+    val_losses = []
+    val_dataloader = # get_dataloader(TRAIN_FILES[0][0], TRAIN_FILES[0][1], batch_size=batch)
+    for epoch in range(n_epochs):
+        logger.info(f"Training on epoch {epoch}.")
+        total_epochs = epoch
+        file_train_loss = []
+        for file, file_id in TRAIN_DS_FILES:
+            train_dataloader =  # get_dataloader(file, file_id, batch_size=batch)
+
+            # Performs training using mini-batches
+            train_loss = mini_batch(
+                model, train_dataloader, optim, loss_func, device, validation=False
+            )
+            file_train_loss.append(train_loss)
+        train_loss = np.mean(file_train_loss)
+        train_losses.append(train_loss)
+
+        # Performs evaluation using mini-batches
+        logger.info("Starting validation.")
+        with torch.no_grad():
+            val_loss = mini_batch(
+                model, val_dataloader, optim, loss_func, device, validation=True
+            )
+            val_losses.append(val_loss)
+
+        wandb.log(
+            {
+                "train_loss": train_loss,
+                "val_loss": val_loss,
+                "epoch": epoch,
+            }
+        )
+
+        if epoch // 2 == 0:
+            logger.info("Initiating checkpoint. Saving model and optimizer states.")
+            save_checkpoint(
+                MODEL_DIR, model, optim, total_epochs, train_losses, val_losses
+            )
+
+
+def mini_batch(
+    model, dataloader, mini_batch_optim, loss_func, device, validation=False
+):
+    # The mini-batch can be used with both loaders
+    # The argument `validation`defines which loader and
+    # corresponding step function is going to be used
+    if validation:
+        step_func = val_step_func(model, loss_func)
+    else:
+        step_func = train_step_func(model, mini_batch_optim, loss_func)
+
+    # Once the data loader and step function, this is the same
+    # mini-batch loop we had before
+    mini_batch_losses = []
+    for x_batch, y_batch in dataloader:
+        x_batch = x_batch.to(device)
+        y_batch = y_batch.to(device)
+        loss = step_func(x=x_batch, y=y_batch)
+        mini_batch_losses.append(loss)
+    loss = np.mean(mini_batch_losses)
+    return loss
+
+
+def train_step_func(model, optim_, loss_func):
+    def perform_train_step_fn(x, y):
+        model.train()
+        preds = model(x)
+        loss = loss_func(preds, y)
+        loss.backward()
+        optim_.step()
+        optim_.zero_grad()
+        return loss.item()
+
+    return perform_train_step_fn
+
+
+def val_step_func(model, loss_func):
+    def perform_val_step_fn(x, y):
+        model.eval()
+        preds = model(x)
+        loss = loss_func(preds, y)
+        return loss.item()
+
+    return perform_val_step_fn
+
+
+def save_checkpoint(filename, model, optimizer, total_epochs, train_losses, val_losses):
+    # Builds dictionary with all elements for resuming training
+    checkpoint = {
+        "epoch": total_epochs,
+        "model_state_dict": model.state_dict(),
+        "optimizer_state_dict": optimizer.state_dict(),
+        "loss": train_losses,
+        "val_loss": val_losses,
+    }
+
+    torch.save(checkpoint, filename)
+
+
+def load_checkpoint(model, optimizer, filename):
+    # Loads dictionary
+    checkpoint = torch.load(filename)
+
+    # Restore state for model and optimizer
+    model.load_state_dict(checkpoint["model_state_dict"])
+    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
+
+    total_epochs = checkpoint["epoch"]
+    losses = checkpoint["loss"]
+    val_losses = checkpoint["val_loss"]
+    return model
+
+
+def distributed_stra_gpu():
+    pass
diff --git a/linguify_yb/src/utils/util.py b/linguify_yb/src/utils/util.py
new file mode 100644
index 00000000..c4371b7b
--- /dev/null
+++ b/linguify_yb/src/utils/util.py
@@ -0,0 +1,77 @@
+import os
+import random
+import argparse
+
+import numpy as np
+import torch
+
+#import torch_xla.core.xla_model as xm
+
+def set_seed(seed: int = 42) -> None:
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    # When running on the CuDNN backend, two further options must be set
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    # Set a fixed value for the hash seed
+    os.environ["PYTHONHASHSEED"] = str(seed)
+
+
+def get_device_strategy(tpu=False):
+    if tpu:
+        device = None #xm.xla_device()
+    else:
+        device = torch.device("cuda" if torch.cuda.is_availabe() else "cpu")
+    return device
+
+
+def parse_args():
+    """
+    Parse arguments given to the script.
+
+    Returns:
+        The parsed argument object.
+    """
+    parser = argparse.ArgumentParser(
+        description="Run distributed data-parallel training and log with wandb."
+    )
+
+    parser.add_argument(
+        "--model",
+        default="asl_transfomer",
+        type=str,
+        metavar="N",
+        help="name of model to train",
+    )
+
+    parser.add_argument(
+        "--epochs",
+        default=2,
+        type=int,
+        metavar="N",
+        help="number of total epochs to run",
+    )
+    parser.add_argument(
+        "--batch",
+        default=32,
+        type=int,
+        metavar="N",
+        help="number of data samples in one batch",
+    )
+    parser.add_argument(
+        "--tpu",
+        default=False,
+        type=bool,
+        metavar="N",
+        help="Train on TPU Device",
+    )
+    parser.add_argument(
+        "--resume_checkpoint",
+        type=bool,
+        help="Path to the checkpoint for resuming training",
+    )
+
+    args = parser.parse_args()
+    return args
diff --git a/version.txt b/version.txt
new file mode 100644
index 00000000..e69de29b

From 0b5653758e506b61458f690be782aa636a598e4c Mon Sep 17 00:00:00 2001
From: rileydrizzy <rileydrizzy@hotmail.com>
Date: Tue, 12 Dec 2023 23:37:08 +0100
Subject: [PATCH 16/16] [add] updates

---
 linguify_yb/development/dev.ipynb            | 54 ++++++++++++++------
 linguify_yb/src/dataset/dataset_loader.py    |  6 +--
 linguify_yb/src/tests/test_data_ingestion.py | 37 +++++++++-----
 linguify_yb/src/trainer.py                   |  4 --
 4 files changed, 66 insertions(+), 35 deletions(-)

diff --git a/linguify_yb/development/dev.ipynb b/linguify_yb/development/dev.ipynb
index 07faeb77..27c00cb4 100644
--- a/linguify_yb/development/dev.ipynb
+++ b/linguify_yb/development/dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -38,33 +38,57 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
-    "torch.is_distri"
+    "sample_sentence_token = [60,51,39,40,50,0,40,50,0,32,0,51,36,50,51,0,49,52,45,61]\n",
+    "# Padding the token\n",
+    "sample_sentence_token = sample_sentence_token + ([59] * (64 - len(sample_sentence_token)))\n",
+    "sample_sentence_token = torch.tensor(sample_sentence_token)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.Size([1, 128, 345]) torch.Size([1, 64]) 1\n",
-      "torch.Size([2, 128, 345]) torch.Size([2, 64]) 2\n",
-      "torch.Size([4, 128, 345]) torch.Size([4, 64]) 4\n",
-      "torch.Size([8, 128, 345]) torch.Size([8, 64]) 8\n"
-     ]
+     "data": {
+      "text/plain": [
+       "64"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(sample_sentence_token)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([60, 51, 39, 40, 50,  0, 40, 50,  0, 32,  0, 51, 36, 50, 51,  0, 49, 52,\n",
+       "        45, 61, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,\n",
+       "        59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,\n",
+       "        59, 59, 59, 59, 59, 59, 59, 59, 59, 59])"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "for b in ans:\n",
-    "    x, y , bs = b\n",
-    "    print(x.shape, y.shape,bs)"
+    "sample_sentence_token"
    ]
   },
   {
diff --git a/linguify_yb/src/dataset/dataset_loader.py b/linguify_yb/src/dataset/dataset_loader.py
index 947531f8..afaa302f 100644
--- a/linguify_yb/src/dataset/dataset_loader.py
+++ b/linguify_yb/src/dataset/dataset_loader.py
@@ -42,11 +42,11 @@ def __init__(
     def _indexesfromsentence(self, sentence):
         return [self.word2index[word] for word in sentence]
 
-    def tensorfromsentence(self, sentence):
+    def sentence_to_tensor(self, sentence):
         indexes = self._indexesfromsentence(sentence)
-        return torch.tensor(indexes, dtype=torch.long)  # .view(1, -1)
+        return torch.tensor(indexes, dtype=torch.long)
 
-    def indexes_to_sentence(self, indexes_list):
+    def index_to_sentence(self, indexes_list):
         if torch.is_tensor(indexes_list):
             indexes_list = indexes_list.tolist()
         words = [self.index2word[idx] for idx in indexes_list]
diff --git a/linguify_yb/src/tests/test_data_ingestion.py b/linguify_yb/src/tests/test_data_ingestion.py
index 1a51fd08..aaa5eedf 100644
--- a/linguify_yb/src/tests/test_data_ingestion.py
+++ b/linguify_yb/src/tests/test_data_ingestion.py
@@ -6,6 +6,7 @@
 from torch.utils.data import DataLoader
 from src.dataset.frames_config import FRAME_LEN
 from src.dataset.preprocess import clean_frames_process
+from src.dataset.dataset_loader import TokenHashTable
 
 # TODO test for frames in right shapes, in tensor, frames are normalize
 # TODO test for frames dont contain NAN
@@ -18,17 +19,27 @@
     [torch.randn(num_frames, 345) for num_frames in [10, 108, 128, 156, 750, 420]],
 )
 def test_frames_preprocess(frames):
-    clean_frames = clean_frames_process(frames)
+    """doc"""
+    frames = clean_frames_process(frames)
     expected_output_shape = (128, 345)
-    assert expected_output_shape == clean_frames.shape
-
-@pytest
-def test_TokenHashTable(tokentable):
-    token_table = 
-    sample_sentences = ""
-    sample_sentences_len = len(sample_sentences)
-    sample_sentences_token = [64,]
-    tokenize_result = token_table
-    assert sample_sentences_len == len(tokenize_result)
-    assert sample_sentences_token == tokenize_result
-
+    assert (
+        expected_output_shape == frames.shape
+    ), f"frames shape should be {expected_output_shape}"
+
+
+def test_token_hash_table():
+    token_table = TokenHashTable()
+    sample_sentence = "this is a test run"
+    sample_sentence_len = len(sample_sentence)
+    sample_sentence_token = [60,51,39,40,50,0,40,50,0,32,0,51,36,50,51,0,49,52,45,61]
+    # Padding the token
+    sample_sentence_token = sample_sentence_token + (
+        [59] * (64 - len(sample_sentence_token))
+    )
+    sample_sentence_token = torch.tensor(sample_sentence_token)
+    tokenize_result = token_table.sentence_to_tensor(sample_sentence)
+    assert sample_sentence_len == len(tokenize_result)
+    assert sample_sentence_token == tokenize_result
+
+    # Assert that clean_frames is a PyTorch tensor
+    assert torch.is_tensor(tokenize_result), "is not PyTorch tensor"
diff --git a/linguify_yb/src/trainer.py b/linguify_yb/src/trainer.py
index c9f191c3..3634140c 100644
--- a/linguify_yb/src/trainer.py
+++ b/linguify_yb/src/trainer.py
@@ -135,7 +135,3 @@ def load_checkpoint(model, optimizer, filename):
     losses = checkpoint["loss"]
     val_losses = checkpoint["val_loss"]
     return model
-
-
-def distributed_stra_gpu():
-    pass