From 05b4632d3bd80bab7ca1f1a47667c81686d1f60e Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Sat, 25 Nov 2023 03:15:26 +0100 Subject: [PATCH 01/16] [add] updates to readme --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f42449d1..9316a822 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# NSL2AUDIOO +# NSL-2-AUDIO [![LICENSE](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE) [![Python](https://img.shields.io/badge/python-3.10-blue.svg?style=flat-square)](https://www.python.org/) @@ -52,13 +52,13 @@ $ make setup $ source $(poetry env info --path)/bin/activate ``` -## Project Roadmap +### Project Roadmap Here's a glimpse of the exciting features we plan to implement in the coming weeks: - - -## Citation +- [x] Add project's documentation (you are reading it now), create issues and milestones, setup document's stub, suggest + page layout and styling. +- [] Develop a Proof of Concept System ## Acknowledgments From 2e7ac2848dd82b10a68d472e009e4f448f7df227 Mon Sep 17 00:00:00 2001 From: Ipadeola Ladipo Ezekiel Date: Tue, 28 Nov 2023 23:37:24 +0000 Subject: [PATCH 02/16] [add] updates --- linguify_yb/src/models/baseline_transfomer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/linguify_yb/src/models/baseline_transfomer.py b/linguify_yb/src/models/baseline_transfomer.py index 4a7120e3..dafcc488 100644 --- a/linguify_yb/src/models/baseline_transfomer.py +++ b/linguify_yb/src/models/baseline_transfomer.py @@ -175,5 +175,6 @@ def generate(self, source, target_start_token_idx): dec_input = torch.cat([dec_input, last_logit], dim=-1) return dec_input + def build_model(): pass From a874fd3708de027c8f82aed3bbdcd5141ba82155 Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Sat, 9 Dec 2023 04:20:18 +0100 Subject: [PATCH 03/16] [add] updates --- README.md | 4 - linguify_yb/README.md | 19 +- linguify_yb/development/dev.ipynb | 340 ++---------------- linguify_yb/src/dataset/dataset.py | 13 +- linguify_yb/src/models/baseline_transfomer.py | 162 ++++++--- linguify_yb/src/train.py | 2 +- linguify_yb/tests/test_data_ingestion.py | 0 linguify_yb/tests/test_model.py | 0 linguify_yb/tests/test_pipeline.py | 0 9 files changed, 164 insertions(+), 376 deletions(-) create mode 100644 linguify_yb/tests/test_data_ingestion.py create mode 100644 linguify_yb/tests/test_model.py create mode 100644 linguify_yb/tests/test_pipeline.py diff --git a/README.md b/README.md index 9316a822..12455609 100644 --- a/README.md +++ b/README.md @@ -68,10 +68,6 @@ I would like to acknowledge the outstanding contributions of : **Email:** **GitHub:** [@tejuafonja](https://github.com/tejuafonja) -**Name:** Fola Animashaun ***(```Mentor```)*** -**Email:** -**GitHub:** [@Modinat-A](https://github.com/Modinat-A) - ## Contact **Name:** **Ipadeola Ezekiel Ladipo** diff --git a/linguify_yb/README.md b/linguify_yb/README.md index cc375adc..a5a84d35 100644 --- a/linguify_yb/README.md +++ b/linguify_yb/README.md @@ -10,4 +10,21 @@ ***Overview:*** \ -# Project Roadmap +## Project Roadmap + +- **[Month Year]:** Project Initiation +- **[Month Year]:** Core Functionality Completion +- **[Month Year]:** User Interface Design Completion +- **[Month Year]:** Data Integration Completion +- **[Month Year]:** Testing and Quality Assurance Completion +- **[Month Year]:** Deployment to Production + +## How to Contribute + +We welcome contributions from the community. If you're interested in contributing, please refer to the [Contributing Guidelines](CONTRIBUTING.md). + +## Support and Contact + +If you have questions or need assistance, feel free to reach out to [Your Contact Information]. + +--- diff --git a/linguify_yb/development/dev.ipynb b/linguify_yb/development/dev.ipynb index 04ed67f6..273dbbb5 100644 --- a/linguify_yb/development/dev.ipynb +++ b/linguify_yb/development/dev.ipynb @@ -63,369 +63,78 @@ "import torch\n", "from torch import nn\n", "import os\n", - "from torchprofile import profile_macs" + "#from torchprofile import profile_macs" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ - "src = torch.randn((10, 32, 5)) # (sequence_length, batch_size, input_dim)\n", - "tgt = torch.randn((20, 32, 5))" + "logits = torch.rand(64,64)" ] }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "torch.Size([10, 32, 5])" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "src.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "5" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "src.size()[-1]" - ] - }, - { - "cell_type": "code", - "execution_count": 9, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ - "\"\"\"doc\n", - "\"\"\"\n", - "\n", - "import torch\n", - "from torch import nn\n", - "\n", - "\n", - "class TokenEmbedding(nn.Module):\n", - " def __init__(self, number_vocab=1000, max_len=100, number_hidden=64):\n", - " super().__init__()\n", - " self.postional_embedding_layers = nn.Embedding(number_vocab, number_hidden)\n", - " self.embedding_layers = nn.Embedding(max_len, number_hidden)\n", - "\n", - " def forward(self, input_x):\n", - " max_len = input_x.size()[-1]\n", - " input_x = self.embedding_layers(input_x)\n", - " # Generate positions using torch.arange\n", - " positions = torch.arange(0, max_len)\n", - " positions = self.postional_embedding_layers(positions)\n", - " return input_x + positions\n", - "\n", - "\n", - "class LandmarkEmbedding(nn.Module):\n", - " def __init__(self, input_dim = None, number_hidden=64, max_len=100):\n", - " super().__init__()\n", - " self.conv1 = nn.Conv1d(\n", - " in_channels=input_dim,\n", - " out_channels=number_hidden,\n", - " kernel_size=11,\n", - " padding=\"same\",\n", - " stride=1,\n", - " )\n", - " self.conv2 = nn.Conv1d(\n", - " in_channels=number_hidden,\n", - " out_channels=number_hidden,\n", - " kernel_size=11,\n", - " padding=\"same\",\n", - " stride=1,\n", - " )\n", - " self.conv3 = nn.Conv1d(\n", - " in_channels=number_hidden,\n", - " out_channels=number_hidden,\n", - " kernel_size=11,\n", - " padding=\"same\",\n", - " stride=1,\n", - " )\n", - " self.postions_embedding_layers = nn.Embedding(max_len, number_hidden)\n", - " self.seq_nn = nn.Sequential(\n", - " self.conv1, nn.ReLU(), self.conv2, nn.ReLU(), self.conv3, nn.ReLU()\n", - " )\n", - "\n", - " def forward(self, input_x):\n", - " outputs = self.seq_nn(input_x)\n", - " return outputs\n", - "\n", - "\n", - "class Transformer(nn.Module):\n", - " def __init__(\n", - " self,\n", - " input_dim,\n", - " output_dim,\n", - " source_maxlen=100,\n", - " target_maxlen=100,\n", - " no_multi_heads=6,\n", - " ):\n", - " super().__init__()\n", - " num_encoder_layers = num_decoder_layers = 6\n", - " encoder_forward_dim = 100\n", - " # Define encoder and decoder layers\n", - " self.encoder_layer = nn.TransformerEncoderLayer(\n", - " d_model=input_dim,\n", - " nhead=no_multi_heads,\n", - " dim_feedforward=encoder_forward_dim,\n", - " activation=\"relu\",\n", - " )\n", - "\n", - " self.decoder_layer = nn.TransformerDecoderLayer(\n", - " d_model=input_dim,\n", - " nhead=no_multi_heads,\n", - " dim_feedforward=output_dim,\n", - " activation=\"relu\",\n", - " )\n", - "\n", - " # Define encoder and decoder\n", - " self.transformer_encoder = nn.TransformerEncoder(\n", - " self.encoder_layer, num_layers=num_encoder_layers\n", - " )\n", - " self.transformer_decoder = nn.TransformerDecoder(\n", - " self.decoder_layer, num_layers=num_decoder_layers\n", - " )\n", - "\n", - " # Input and output linear layers\n", - " self.input_linear = LandmarkEmbedding(input_dim=input_dim,max_len=source_maxlen)\n", - " self.target_linear = TokenEmbedding(max_len=target_maxlen)\n", - " self.num_classes = 60\n", - " self.output_linear = nn.Linear(output_dim, self.num_classes)\n", - "\n", - " def forward(self, input_x, input_y):\n", - " # Apply EMbedding\n", - " input_x = self.input_linear(input_x)\n", - "\n", - " # Transformer encoding\n", - " memory = self.transformer_encoder(input_x)\n", - "\n", - " # Apply linear layer to the target\n", - " input_y = self.target_linear(input_y)\n", - "\n", - " # Transformer decoding\n", - " output = self.transformer_decoder(input_y, memory)\n", - "\n", - " # Apply linear layer to the output\n", - " output = self.output_linear(output)\n", - "\n", - " return output\n", - "\n", - " # TODO code generate for inference\n", - " def generate(\n", - " self,\n", - " ):\n", - " pass\n" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/transformer.py:282: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.self_attn.batch_first was not True(use batch_first for better inference performance)\n", - " warnings.warn(f\"enable_nested_tensor is True, but self.use_nested_tensor is False because {why_not_sparsity_fast_path}\")\n" - ] - }, - { - "ename": "IndexError", - "evalue": "index out of range in self", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb Cell 7\u001b[0m line \u001b[0;36m1\n\u001b[1;32m 14\u001b[0m tgt \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mrandn((batch_size, input_dim, input_dim))\u001b[39m.\u001b[39mlong() \u001b[39m# (sequence_length, batch_size, input_dim)\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[39m# Forward pass\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m output \u001b[39m=\u001b[39m model(src, tgt)\n\u001b[1;32m 19\u001b[0m \u001b[39m# Print the output shape\u001b[39;00m\n\u001b[1;32m 20\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mOutput shape:\u001b[39m\u001b[39m\"\u001b[39m, output\u001b[39m.\u001b[39mshape)\n", - "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", - "\u001b[1;32m/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb Cell 7\u001b[0m line \u001b[0;36m1\n\u001b[1;32m 103\u001b[0m memory \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransformer_encoder(input_x)\n\u001b[1;32m 105\u001b[0m \u001b[39m# Apply linear layer to the target\u001b[39;00m\n\u001b[0;32m--> 106\u001b[0m input_y \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtarget_linear(input_y)\n\u001b[1;32m 108\u001b[0m \u001b[39m# Transformer decoding\u001b[39;00m\n\u001b[1;32m 109\u001b[0m output \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransformer_decoder(input_y, memory)\n", - "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", - "\u001b[1;32m/workspace/Cohort8-Ransom-Kuti-Ladipo/linguify_yb/development/dev.ipynb Cell 7\u001b[0m line \u001b[0;36m1\n\u001b[1;32m 14\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mforward\u001b[39m(\u001b[39mself\u001b[39m, input_x):\n\u001b[1;32m 15\u001b[0m max_len \u001b[39m=\u001b[39m input_x\u001b[39m.\u001b[39msize()[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\n\u001b[0;32m---> 16\u001b[0m input_x \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49membedding_layers(input_x)\n\u001b[1;32m 17\u001b[0m \u001b[39m# Generate positions using torch.arange\u001b[39;00m\n\u001b[1;32m 18\u001b[0m positions \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39marange(\u001b[39m0\u001b[39m, max_len)\n", - "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", - "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/modules/sparse.py:162\u001b[0m, in \u001b[0;36mEmbedding.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mforward\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39minput\u001b[39m: Tensor) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tensor:\n\u001b[0;32m--> 162\u001b[0m \u001b[39mreturn\u001b[39;00m F\u001b[39m.\u001b[39;49membedding(\n\u001b[1;32m 163\u001b[0m \u001b[39minput\u001b[39;49m, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mweight, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpadding_idx, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmax_norm,\n\u001b[1;32m 164\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mnorm_type, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mscale_grad_by_freq, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msparse)\n", - "File \u001b[0;32m/workspace/.pyenv_mirror/user/current/lib/python3.10/site-packages/torch/nn/functional.py:2233\u001b[0m, in \u001b[0;36membedding\u001b[0;34m(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)\u001b[0m\n\u001b[1;32m 2227\u001b[0m \u001b[39m# Note [embedding_renorm set_grad_enabled]\u001b[39;00m\n\u001b[1;32m 2228\u001b[0m \u001b[39m# XXX: equivalent to\u001b[39;00m\n\u001b[1;32m 2229\u001b[0m \u001b[39m# with torch.no_grad():\u001b[39;00m\n\u001b[1;32m 2230\u001b[0m \u001b[39m# torch.embedding_renorm_\u001b[39;00m\n\u001b[1;32m 2231\u001b[0m \u001b[39m# remove once script supports set_grad_enabled\u001b[39;00m\n\u001b[1;32m 2232\u001b[0m _no_grad_embedding_renorm_(weight, \u001b[39minput\u001b[39m, max_norm, norm_type)\n\u001b[0;32m-> 2233\u001b[0m \u001b[39mreturn\u001b[39;00m torch\u001b[39m.\u001b[39;49membedding(weight, \u001b[39minput\u001b[39;49m, padding_idx, scale_grad_by_freq, sparse)\n", - "\u001b[0;31mIndexError\u001b[0m: index out of range in self" - ] - } - ], - "source": [ - "#RuntimeError: Given groups=1, weight of size [11, 64, 2], \n", - "#expected input[100, 32, 513] to have 64 channels, but got 32 channels instead\n", - "# Example usage:\n", - "input_dim = 513 # Adjust based on your input dimension\n", - "output_dim = 256 # Adjust based on your output dimension\n", - "nhead = 9\n", - "batch_size = 16\n", - "sequnce = 100\n", - "# Instantiate the model\n", - "model = Transformer(input_dim, output_dim,no_multi_heads=nhead)\n", - "\n", - "# Create dummy input\n", - "src = torch.randn((batch_size, input_dim, input_dim)) # (sequence_length, batch_size, input_dim)\n", - "tgt = torch.randn((batch_size, input_dim, input_dim)).long() # (sequence_length, batch_size, input_dim)\n", - "\n", - "# Forward pass\n", - "output = model(src, tgt)\n", - "\n", - "# Print the output shape\n", - "print(\"Output shape:\", output.shape)" + "dec_logits = []\n", + "dec_input = (torch.ones((1), dtype=torch.long)* 60)" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "No error, the configuration is valid.\n" - ] - } - ], - "source": [ - "input_dim = 513\n", - "num_heads = 9\n", - "\n", - "# Check if embed_dim is divisible by num_heads\n", - "if input_dim % num_heads != 0:\n", - " print(\"Error: embed_dim must be divisible by num_heads\")\n", - "else:\n", - " print(\"No error, the configuration is valid.\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n", - " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,\n", - " 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,\n", - " 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,\n", - " 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,\n", - " 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "positions = torch.arange(0, 100)\n", - "positions" - ] - }, - { - "cell_type": "code", - "execution_count": 26, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ - "par = ['filepat','file2', 'file3']\n", - "id = [1,2,3,]\n", - "assert len(id)== len(par), 'failed'" + "logits = torch.argmax(logits, dim=-1, keepdim=True) \n", + "last_logit = logits[:, -1]\n", + "dec_logits.append(last_logit)\n", + "dec_input = torch.cat([dec_input, last_logit], dim=-1)" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[('filepat', 1), ('file2', 2), ('file3', 3)]" + "torch.Size([64])" ] }, - "execution_count": 27, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ans = list(zip(par,id))\n", - "ans" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "filepat\n", - "1\n", - "file2\n", - "2\n", - "file3\n", - "3\n" - ] - } - ], - "source": [ - "for nu , id in ans:\n", - " print(nu)\n", - " print(id)" + "last_logit.shape" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'f'" + "torch.Size([65])" ] }, - "execution_count": 30, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "nu[0]" + "dec_input.shape" ] }, { @@ -433,7 +142,14 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "for idx in preds[i, :]:\n", + " prediction += self.idx_to_char[idx]\n", + " if idx == 60:\n", + " break\n", + "print(f\"target: {target_text.replace('-','')}\")\n", + "print(f\"prediction: {prediction}\\n\")" + ] } ], "metadata": { @@ -452,7 +168,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.2" } }, "nbformat": 4, diff --git a/linguify_yb/src/dataset/dataset.py b/linguify_yb/src/dataset/dataset.py index 2f31f4ce..e3b85bd3 100644 --- a/linguify_yb/src/dataset/dataset.py +++ b/linguify_yb/src/dataset/dataset.py @@ -10,8 +10,7 @@ from torch.nn import functional as F from torch.utils.data import DataLoader, Dataset -from linguify_yb.src.dataset.frames_config import (FEATURE_COLUMNS, LHAND_IDX, - RHAND_IDX) +from linguify_yb.src.dataset.frames_config import FEATURE_COLUMNS, LHAND_IDX, RHAND_IDX from linguify_yb.src.dataset.preprocess import frames_preprocess PHRASE_PATH = "data/asl-fingerspelling/character_to_prediction_index.json" @@ -94,7 +93,7 @@ def __len__(self): def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() - phrase = self.labels[idx] + phrase = self.labels[idx] frames = self.frames[idx] if self.trans: @@ -102,6 +101,7 @@ def __getitem__(self, idx): frames = frames_preprocess(frames) return frames, phrase + def pack_collate_func(batch): frames_feature = [item[0] for item in batch] phrase = [item[1] for item in batch] @@ -121,9 +121,9 @@ def get_dataloader(file_path, file_id, batch_size): return dataloader - # For Debugging Train Pipeline + class TestDataset(Dataset): def __init__(self, num_samples=1000, input_size=10): self.num_samples = num_samples @@ -137,6 +137,9 @@ def __len__(self): def __getitem__(self, idx): return self.data[idx], self.labels[idx] + # Generating a dataset with 1000 samples and 10 input features testdataset = TestDataset(num_samples=1000, input_size=10) -TEST_LOADER = DataLoader(dataset=testdataset, batch_size=1, num_workers=2, pin_memory= True) \ No newline at end of file +TEST_LOADER = DataLoader( + dataset=testdataset, batch_size=1, num_workers=2, pin_memory=True +) diff --git a/linguify_yb/src/models/baseline_transfomer.py b/linguify_yb/src/models/baseline_transfomer.py index dafcc488..543980cc 100644 --- a/linguify_yb/src/models/baseline_transfomer.py +++ b/linguify_yb/src/models/baseline_transfomer.py @@ -2,13 +2,13 @@ """ import torch -import torch.nn as nn +from torch import nn import torch.nn.functional as F class TokenEmbedding(nn.Module): def __init__(self, num_vocab=1000, maxlen=100, num_hid=64): - super(TokenEmbedding, self).__init__() + super().__init__() self.emb = nn.Embedding(num_vocab, num_hid) self.pos_emb = nn.Embedding(maxlen, num_hid) @@ -22,21 +22,59 @@ def forward(self, x): class LandmarkEmbedding(nn.Module): def __init__(self, num_hid=64, maxlen=100): - super(LandmarkEmbedding, self).__init__() - self.conv1 = nn.Conv1d(num_hid, 11, stride=2, padding="same") - self.conv2 = nn.Conv1d(num_hid, 11, stride=2, padding="same") - self.conv3 = nn.Conv1d(num_hid, 11, stride=2, padding="same") + super().__init__() + # Calculate the padding for "same" padding + self.padding = (11 - 1) // 2 + self.output_embedding_dim = num_hid + self.conv1 = nn.Conv1d( + in_channels=1, + out_channels=64, + kernel_size=11, + stride=1, + padding=self.padding, + ) + self.conv2 = nn.Conv1d( + in_channels=64, + out_channels=128, + kernel_size=11, + stride=1, + padding=self.padding, + ) + self.conv3 = nn.Conv1d( + in_channels=128, + out_channels=256, + kernel_size=11, + stride=1, + padding=self.padding, + ) self.pos_emb = nn.Embedding(maxlen, num_hid) + self.embedding_layer = nn.Linear(256 * 345, self.output_embedding_dim) def forward(self, x): + # Input x should have shape (batch_size, input_size) + x = x.unsqueeze(1) # Add a channel dimension for 1D convolution x = F.relu(self.conv1(x)) x = F.relu(self.conv2(x)) - return F.relu(self.conv3(x)) + x = F.relu(self.conv3(x)) + + # Flatten the output before passing through the linear embedding layer + x = x.view(x.size(0), -1) + + # Apply the linear embedding layer + x = self.embedding_layer(x) + + return x class TransformerEncoder(nn.Module): - def __init__(self, embed_dim, num_heads, feed_forward_dim, rate=0.1): - super(TransformerEncoder, self).__init__() + def __init__( + self, + embed_dim, + num_heads, + feed_forward_dim, + rate=0.1, + ): + super().__init__() self.att = nn.MultiheadAttention(embed_dim, num_heads) self.ffn = nn.Sequential( nn.Linear(embed_dim, feed_forward_dim), @@ -49,7 +87,7 @@ def __init__(self, embed_dim, num_heads, feed_forward_dim, rate=0.1): self.dropout1 = nn.Dropout(rate) self.dropout2 = nn.Dropout(rate) - def forward(self, inputs, training): + def forward(self, inputs): attn_out, _ = self.att(inputs, inputs, inputs) attn_out = self.dropout1(attn_out) out1 = self.layernorm1(inputs + attn_out) @@ -61,7 +99,7 @@ def forward(self, inputs, training): class TransformerDecoder(nn.Module): def __init__(self, embed_dim, num_heads, feed_forward_dim, dropout_rate=0.1): - super(TransformerDecoder, self).__init__() + super().__init__() self.layernorm1 = nn.LayerNorm(embed_dim, eps=1e-6) self.layernorm2 = nn.LayerNorm(embed_dim, eps=1e-6) self.layernorm3 = nn.LayerNorm(embed_dim, eps=1e-6) @@ -76,29 +114,32 @@ def __init__(self, embed_dim, num_heads, feed_forward_dim, dropout_rate=0.1): nn.Linear(feed_forward_dim, embed_dim), ) - def causal_attention_mask(self, batch_size, n_dest, n_src, dtype): - i = torch.arange(n_dest)[:, None] - j = torch.arange(n_src) - m = i >= j - n_src + n_dest - mask = m.to(dtype) - mask = mask.view(1, n_dest, n_src) - mult = torch.cat( - [batch_size[..., None], torch.tensor([1, 1], dtype=torch.int32)], 0 + def causal_attention_mask( + self, sequence_length, batch_size=1, num_heads=8, device="cpu" + ): + mask = torch.triu(torch.ones(sequence_length, sequence_length), diagonal=1).to( + device ) - return mask.repeat(mult) - - def forward(self, enc_out, target, training): - input_shape = target.size() - batch_size = input_shape[0] - seq_len = input_shape[1] - causal_mask = self.causal_attention_mask( - batch_size, seq_len, seq_len, torch.bool + mask = mask.unsqueeze(0).expand( + batch_size * num_heads, sequence_length, sequence_length ) + return mask - target_att = self.self_att(target, target, target, attn_mask=causal_mask) - target_norm = self.layernorm1(target + self.self_dropout(target_att)) + def forward( + self, + src_target_, + enc_out, + ): + input_shape = src_target_.size() + batch_size = 1 # input_shape[0] + seq_len = input_shape[0] + mask = self.causal_attention_mask(seq_len, batch_size=batch_size) + target_att, _ = self.self_att( + src_target_, src_target_, src_target_, attn_mask=mask + ) + target_norm = self.layernorm1(src_target_ + self.self_dropout(target_att)) - enc_out = self.enc_att(target_norm, enc_out, enc_out) + enc_out, _ = self.enc_att(target_norm, enc_out, enc_out) enc_out_norm = self.layernorm2(enc_out + self.enc_dropout(enc_out)) ffn_out = self.ffn(enc_out_norm) @@ -107,27 +148,26 @@ def forward(self, enc_out, target, training): return ffn_out_norm -class Transformer(nn.Module): +class NTransformer(nn.Module): def __init__( self, num_hid=64, - num_head=2, + num_head=8, num_feed_forward=128, source_maxlen=100, target_maxlen=100, num_layers_enc=4, num_layers_dec=1, - num_classes=60, ): - super(Transformer, self).__init__() + super().__init__() self.num_layers_enc = num_layers_enc self.num_layers_dec = num_layers_dec self.target_maxlen = target_maxlen - self.num_classes = num_classes + self.num_classes = 64 self.enc_input = LandmarkEmbedding(num_hid=num_hid, maxlen=source_maxlen) self.dec_input = TokenEmbedding( - num_vocab=num_classes, maxlen=target_maxlen, num_hid=num_hid + num_vocab=64, ) self.encoder = nn.Sequential( @@ -144,37 +184,53 @@ def __init__( TransformerDecoder(num_hid, num_head, num_feed_forward), ) - self.classifier = nn.Linear(num_hid, num_classes) - - def decode(self, enc_out, target, training): - y = self.dec_input(target) - for i in range(self.num_layers_dec): - y = getattr(self, f"dec_layer_{i}")(enc_out, y, training) - return y + self.classifier = nn.Linear(num_hid, self.num_classes) - def forward(self, inputs, training): + def forward(self, inputs): source, target = inputs x = self.encoder(source) - y = self.decode(x, target, training) + y = self.decode(x, target) return self.classifier(y) - def generate(self, source, target_start_token_idx): + def decode(self, enc_out, target): + y = self.dec_input(target) + for i in range(self.num_layers_dec): + y = getattr(self, f"dec_layer_{i}")( + enc_out, + y, + ) + return y + + def generate(self, source, target_start_token_idx=60): + """Performs inference over one batch of inputs using greedy decoding + + Parameters + ---------- + source : _type_ + _description_ + target_start_token_idx : _type_ + _description_ + + Returns + ------- + _type_ + _description_ + """ bs = source.size(0) enc = self.encoder(source) dec_input = ( - torch.ones((bs, 1), dtype=torch.long).to(source.device) - * target_start_token_idx + torch.ones((1), dtype=torch.long).to(source.device) * target_start_token_idx ) dec_logits = [] + counter = 0 for i in range(self.target_maxlen - 1): - dec_out = self.decode(enc, dec_input, training=False) + dec_out = self.decode(enc, dec_input) logits = self.classifier(dec_out) logits = torch.argmax(logits, dim=-1, keepdim=True) last_logit = logits[:, -1] dec_logits.append(last_logit) dec_input = torch.cat([dec_input, last_logit], dim=-1) + counter += 1 + if counter > 2: + break return dec_input - - -def build_model(): - pass diff --git a/linguify_yb/src/train.py b/linguify_yb/src/train.py index b614e206..cdec6f38 100644 --- a/linguify_yb/src/train.py +++ b/linguify_yb/src/train.py @@ -6,7 +6,7 @@ # --epochs 10 \ # --batch 512 \ """ -# TODO Complete and refactor code +# TODO Complete and refactor code for distributed training import os import json diff --git a/linguify_yb/tests/test_data_ingestion.py b/linguify_yb/tests/test_data_ingestion.py new file mode 100644 index 00000000..e69de29b diff --git a/linguify_yb/tests/test_model.py b/linguify_yb/tests/test_model.py new file mode 100644 index 00000000..e69de29b diff --git a/linguify_yb/tests/test_pipeline.py b/linguify_yb/tests/test_pipeline.py new file mode 100644 index 00000000..e69de29b From 2316deda7afdace08a53fd5cd3f56c4dd054604c Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Sun, 10 Dec 2023 21:08:28 +0100 Subject: [PATCH 04/16] [add] updates --- ..._transfomer.py => baseline_transformer.py} | 84 ++++++++----------- 1 file changed, 37 insertions(+), 47 deletions(-) rename linguify_yb/src/models/{baseline_transfomer.py => baseline_transformer.py} (78%) diff --git a/linguify_yb/src/models/baseline_transfomer.py b/linguify_yb/src/models/baseline_transformer.py similarity index 78% rename from linguify_yb/src/models/baseline_transfomer.py rename to linguify_yb/src/models/baseline_transformer.py index 543980cc..02d76856 100644 --- a/linguify_yb/src/models/baseline_transfomer.py +++ b/linguify_yb/src/models/baseline_transformer.py @@ -3,11 +3,10 @@ import torch from torch import nn -import torch.nn.functional as F class TokenEmbedding(nn.Module): - def __init__(self, num_vocab=1000, maxlen=100, num_hid=64): + def __init__(self, num_vocab=1000, maxlen=100, num_hid=200): super().__init__() self.emb = nn.Embedding(num_vocab, num_hid) self.pos_emb = nn.Embedding(maxlen, num_hid) @@ -24,41 +23,33 @@ class LandmarkEmbedding(nn.Module): def __init__(self, num_hid=64, maxlen=100): super().__init__() # Calculate the padding for "same" padding - self.padding = (11 - 1) // 2 - self.output_embedding_dim = num_hid + padding = (11 - 1) // 2 + + # Define three 1D convolutional layers with ReLU activation and stride 2 self.conv1 = nn.Conv1d( - in_channels=1, - out_channels=64, - kernel_size=11, - stride=1, - padding=self.padding, + in_channels=1, out_channels=64, kernel_size=11, stride=2, padding=padding ) self.conv2 = nn.Conv1d( - in_channels=64, - out_channels=128, - kernel_size=11, - stride=1, - padding=self.padding, + in_channels=64, out_channels=128, kernel_size=11, stride=2, padding=padding ) self.conv3 = nn.Conv1d( - in_channels=128, - out_channels=256, - kernel_size=11, - stride=1, - padding=self.padding, + in_channels=128, out_channels=256, kernel_size=11, stride=2, padding=padding ) - self.pos_emb = nn.Embedding(maxlen, num_hid) - self.embedding_layer = nn.Linear(256 * 345, self.output_embedding_dim) + + # Output embedding layer + self.embedding_layer = nn.Linear(256, num_hid) def forward(self, x): # Input x should have shape (batch_size, input_size) x = x.unsqueeze(1) # Add a channel dimension for 1D convolution - x = F.relu(self.conv1(x)) - x = F.relu(self.conv2(x)) - x = F.relu(self.conv3(x)) - # Flatten the output before passing through the linear embedding layer - x = x.view(x.size(0), -1) + # Apply convolutional layers with ReLU activation and stride 2 + x = torch.relu(self.conv1(x)) + x = torch.relu(self.conv2(x)) + x = torch.relu(self.conv3(x)) + + # Global average pooling to reduce spatial dimensions + x = torch.mean(x, dim=2) # Apply the linear embedding layer x = self.embedding_layer(x) @@ -94,7 +85,9 @@ def forward(self, inputs): ffn_out = self.ffn(out1) ffn_out = self.dropout2(ffn_out) - return self.layernorm2(out1 + ffn_out) + x = self.layernorm2(out1 + ffn_out) + print(f"endocder{x.shape}") + return x class TransformerDecoder(nn.Module): @@ -115,7 +108,7 @@ def __init__(self, embed_dim, num_heads, feed_forward_dim, dropout_rate=0.1): ) def causal_attention_mask( - self, sequence_length, batch_size=1, num_heads=8, device="cpu" + self, sequence_length, batch_size=1, num_heads=4, device="cpu" ): mask = torch.triu(torch.ones(sequence_length, sequence_length), diagonal=1).to( device @@ -127,8 +120,8 @@ def causal_attention_mask( def forward( self, - src_target_, enc_out, + src_target_, ): input_shape = src_target_.size() batch_size = 1 # input_shape[0] @@ -144,7 +137,7 @@ def forward( ffn_out = self.ffn(enc_out_norm) ffn_out_norm = self.layernorm3(enc_out_norm + self.ffn_dropout(ffn_out)) - + print(f"decoder - {ffn_out_norm.shape}") return ffn_out_norm @@ -157,17 +150,17 @@ def __init__( source_maxlen=100, target_maxlen=100, num_layers_enc=4, - num_layers_dec=1, + num_layers_dec=4, ): super().__init__() self.num_layers_enc = num_layers_enc self.num_layers_dec = num_layers_dec self.target_maxlen = target_maxlen - self.num_classes = 64 + self.num_classes = 62 self.enc_input = LandmarkEmbedding(num_hid=num_hid, maxlen=source_maxlen) self.dec_input = TokenEmbedding( - num_vocab=64, + num_vocab=self.num_classes, maxlen=target_maxlen ) self.encoder = nn.Sequential( @@ -186,19 +179,19 @@ def __init__( self.classifier = nn.Linear(num_hid, self.num_classes) - def forward(self, inputs): - source, target = inputs + def forward(self, source, target): x = self.encoder(source) - y = self.decode(x, target) + y = self.decoder_run(x, target) + print(y.shape) return self.classifier(y) - def decode(self, enc_out, target): + def decoder_run(self, enc_out, target): + print(f"before emb {target.shape}") y = self.dec_input(target) + print(f"after emb {y.shape}") + for i in range(self.num_layers_dec): - y = getattr(self, f"dec_layer_{i}")( - enc_out, - y, - ) + y = getattr(self, f"dec_layer_{i}")(enc_out, y) return y def generate(self, source, target_start_token_idx=60): @@ -222,15 +215,12 @@ def generate(self, source, target_start_token_idx=60): torch.ones((1), dtype=torch.long).to(source.device) * target_start_token_idx ) dec_logits = [] - counter = 0 for i in range(self.target_maxlen - 1): - dec_out = self.decode(enc, dec_input) + dec_out = self.decoder_run(enc, dec_input) logits = self.classifier(dec_out) + logits = torch.argmax(logits, dim=-1, keepdim=True) - last_logit = logits[:, -1] + last_logit = logits[-1] dec_logits.append(last_logit) dec_input = torch.cat([dec_input, last_logit], dim=-1) - counter += 1 - if counter > 2: - break return dec_input From 4a3fd6546e7a0698706db4f70a3a3cb64af9d01a Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Mon, 11 Dec 2023 02:45:15 +0100 Subject: [PATCH 05/16] [add] mics --- linguify_yb/src/dataset/__init__.py | 0 linguify_yb/src/dataset/dataset.py | 145 -------------- linguify_yb/src/dataset/subsample_data.py | 124 ------------ linguify_yb/{ => src/tests}/__init__.py | 0 linguify_yb/src/train.py | 221 ---------------------- linguify_yb/src/utils/__init__.py | 74 -------- linguify_yb/src/utils/benchmark,py | 50 ----- linguify_yb/tests/test_data_ingestion.py | 0 linguify_yb/tests/test_model.py | 0 linguify_yb/tests/test_pipeline.py | 0 10 files changed, 614 deletions(-) delete mode 100644 linguify_yb/src/dataset/__init__.py delete mode 100644 linguify_yb/src/dataset/dataset.py delete mode 100644 linguify_yb/src/dataset/subsample_data.py rename linguify_yb/{ => src/tests}/__init__.py (100%) delete mode 100644 linguify_yb/src/train.py delete mode 100644 linguify_yb/src/utils/__init__.py delete mode 100644 linguify_yb/src/utils/benchmark,py delete mode 100644 linguify_yb/tests/test_data_ingestion.py delete mode 100644 linguify_yb/tests/test_model.py delete mode 100644 linguify_yb/tests/test_pipeline.py diff --git a/linguify_yb/src/dataset/__init__.py b/linguify_yb/src/dataset/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/linguify_yb/src/dataset/dataset.py b/linguify_yb/src/dataset/dataset.py deleted file mode 100644 index e3b85bd3..00000000 --- a/linguify_yb/src/dataset/dataset.py +++ /dev/null @@ -1,145 +0,0 @@ -"""doc -""" - -import json - -import numpy as np -import pandas as pd -import pyarrow.parquet as pq -import torch -from torch.nn import functional as F -from torch.utils.data import DataLoader, Dataset - -from linguify_yb.src.dataset.frames_config import FEATURE_COLUMNS, LHAND_IDX, RHAND_IDX -from linguify_yb.src.dataset.preprocess import frames_preprocess - -PHRASE_PATH = "data/asl-fingerspelling/character_to_prediction_index.json" -METADATA = "data/asl-fingerspelling/train.csv" - -with open(PHRASE_PATH, "r", encoding="utf-8") as f: - character_to_num = json.load(f) - -PAD_TOKEN = "P" -START_TOKEN = "<" -END_TOKEN = ">" -PAD_TOKEN_IDX = 59 -START_TOKEN_IDX = 60 -END_TOKEN_IDX = 61 - -character_to_num[PAD_TOKEN] = PAD_TOKEN_IDX -character_to_num[START_TOKEN] = START_TOKEN_IDX -character_to_num[END_TOKEN] = END_TOKEN_IDX -num_to_character = {j: i for i, j in character_to_num.items()} - - -class TokenHashTable: - def __init__(self, word2index_mapping, index2word_mapping): - self.word2index = word2index_mapping - self.index2word = index2word_mapping - - def _indexesfromsentence(self, sentence): - return [self.word2index[word] for word in sentence] - - def tensorfromsentence(self, sentence): - indexes = self._indexesfromsentence(sentence) - return torch.tensor(indexes, dtype=torch.long) # .view(1, -1) - - -def read_file(file, file_id, landmarks_metadata_path): - phrase_list = [] - frames_list = [] - metadata_train_dataframe = pd.read_csv(landmarks_metadata_path) - file_id_df = metadata_train_dataframe.loc[ - metadata_train_dataframe["file_id"] == file_id - ] - saved_parueat_df = pq.read_table( - file, columns=["sequence_id"] + FEATURE_COLUMNS - ).to_pandas() - for seq_id, phrase in zip(file_id_df.sequence_id, file_id_df.phrase): - frames = saved_parueat_df[saved_parueat_df.index == seq_id].to_numpy() - # NaN - right_num_nan = np.sum(np.sum(np.isnan(frames[:, RHAND_IDX]), axis=1) == 0) - left_num_nan = np.sum(np.sum(np.isnan(frames[:, LHAND_IDX]), axis=1) == 0) - total_num_nan = max(right_num_nan, left_num_nan) - if 2 * len(phrase) < total_num_nan: - frames_list.append(frames) - phrase_list.append(phrase) - return (frames_list, phrase_list) - - -class LandmarkDataset(Dataset): - def __init__(self, file_path, file_id, table, transform=True): - self.landmarks_metadata_path = METADATA - self.frames, self.labels = read_file( - file_path, file_id, self.landmarks_metadata_path - ) - self.trans = transform - self.table = table - - def _label_pre(self, label_sample): - sample = START_TOKEN + label_sample + END_TOKEN - new_phrase = self.table.tensorfromsentence(list(sample)) - ans = F.pad( - input=new_phrase, - pad=[0, 64 - new_phrase.shape[0]], - mode="constant", - value=PAD_TOKEN_IDX, - ) - return ans - - def __len__(self): - return len(self.labels) - - def __getitem__(self, idx): - if torch.is_tensor(idx): - idx = idx.tolist() - phrase = self.labels[idx] - frames = self.frames[idx] - - if self.trans: - phrase = self._label_pre(phrase) - frames = frames_preprocess(frames) - return frames, phrase - - -def pack_collate_func(batch): - frames_feature = [item[0] for item in batch] - phrase = [item[1] for item in batch] - return [frames_feature, phrase] - - -def get_dataloader(file_path, file_id, batch_size): - lookup_table = TokenHashTable(character_to_num, num_to_character) - dataset = LandmarkDataset(file_path, file_id, lookup_table, transform=True) - - dataloader = DataLoader( - dataset, - batch_size=batch_size, - num_workers=2, - pin_memory=True, - ) - return dataloader - - -# For Debugging Train Pipeline - - -class TestDataset(Dataset): - def __init__(self, num_samples=1000, input_size=10): - self.num_samples = num_samples - self.input_size = input_size - self.data = torch.randn(num_samples, input_size) - self.labels = torch.randint(0, 2, (num_samples,)) - - def __len__(self): - return self.num_samples - - def __getitem__(self, idx): - return self.data[idx], self.labels[idx] - - -# Generating a dataset with 1000 samples and 10 input features -testdataset = TestDataset(num_samples=1000, input_size=10) -TEST_LOADER = DataLoader( - dataset=testdataset, batch_size=1, num_workers=2, pin_memory=True -) diff --git a/linguify_yb/src/dataset/subsample_data.py b/linguify_yb/src/dataset/subsample_data.py deleted file mode 100644 index 3935c2b1..00000000 --- a/linguify_yb/src/dataset/subsample_data.py +++ /dev/null @@ -1,124 +0,0 @@ -"""Dataset Download Module - -This module provides functions to download the a subsample of Google ASL dataset. - -Functions: -- download_dataset(url: str, destination: str, path): - Downloads a dataset from the given URL to the specified destination directory. -- main - the main function to run the script -""" - - -import os -import shutil -import subprocess -import zipfile - -from linguify_yb.src.utils.logger_util import logger - -DATA_DIR = "data/asl-fingerspelling/" -data_files = ["train.csv", "character_to_prediction_index.json"] -train_landmarks = ["1019715464.parquet", "1021040628.parquet", "105143404.parquet"] -TRAIN_LANDMARKS_DIR = "train_landmarks/" - -COMMAND = [ - "kaggle", - "competitions", - "download", - "-c", - "asl-fingerspelling", - "-f", - "FILE", - "-p", - "data/raw/", -] - - -def check_storage(project_dir=os.getcwd()): - """check and return availabe storage space - - Parameters - ---------- - directory_path : str, Path - current working directory/directory path - - Returns - ------- - int - the size of available storage space (GB) - - Raises - ------ - StorageFullError - exception for when storage is full. - """ - total, used, free = shutil.disk_usage(project_dir) - total_size_gb = round(total / (2**30), 2) - used_size_gb = round(used / (2**30), 2) - free_size_gb = round(free / (2**30), 2) - if used_size_gb / total_size_gb >= 0.8: - raise StorageFullError - return free_size_gb - - -class StorageFullError(Exception): - """Custom exception for when storage is full.""" - - pass - - -def downlaod_file(cmd, unzipped_file_path, data_dir): - """Download file using kaggle API - - Parameters - ---------- - cmd : list - Kaggle API Commands - unzipped_file : str, Path - path of the unzipped file - data_dir : str, Path - the directory where the data should be downloaded into - """ - subprocess.run(cmd, check=True, text=True) - if ( - os.path.exists(unzipped_file_path) - and os.path.splitext(unzipped_file_path)[1].lower() == ".zip" - ): - # Unzipping and delete the zipped file to free storage - with zipfile.ZipFile(unzipped_file_path, "r") as zip_ref: - zip_ref.extractall(data_dir) - os.remove(unzipped_file_path) - else: - pass - - -def main(): - """the main function to run the script""" - logger.info("Commencing downloading the dataset") - try: - logger.info(f"Current Available space {check_storage()}GB") - for file in data_files: - logger.info(f"Downloading{file} in {DATA_DIR}") - COMMAND[6] = file - unzipfile_path = DATA_DIR + file + ".zip" - downlaod_file(COMMAND, unzipfile_path, DATA_DIR) - logger.info(f" {file} downloaded succesful") - # Downloading the LANDMARKS files - for parquet_file in train_landmarks: - logger.info(f"Current Available space {check_storage()}GB") - file_path = TRAIN_LANDMARKS_DIR + parquet_file - COMMAND[6] = file_path - COMMAND[8] = DATA_DIR + TRAIN_LANDMARKS_DIR - unzipfile_path = DATA_DIR + file_path + ".zip" - downlaod_file(COMMAND, unzipfile_path, DATA_DIR + TRAIN_LANDMARKS_DIR) - logger.info(f"{parquet_file} downloaded succesfully") - - logger.success("All files downloaded succesfully") - - except Exception as error: - logger.error(f"failed due to {error}") - logger.exception("Data unloading was unsuccesfully") - - -if __name__ == "__main__": - main() diff --git a/linguify_yb/__init__.py b/linguify_yb/src/tests/__init__.py similarity index 100% rename from linguify_yb/__init__.py rename to linguify_yb/src/tests/__init__.py diff --git a/linguify_yb/src/train.py b/linguify_yb/src/train.py deleted file mode 100644 index cdec6f38..00000000 --- a/linguify_yb/src/train.py +++ /dev/null @@ -1,221 +0,0 @@ -""" -doc - -# Usage: -# python -m src/train.py \ -# --epochs 10 \ -# --batch 512 \ -""" -# TODO Complete and refactor code for distributed training - -import os -import json - -import numpy as np -import torch -import wandb -from torch import nn - -from linguify_yb.src.dataset.dataset import get_dataloader, TEST_LOADER -from linguify_yb.src.models.model_loader import ModelLoader -from linguify_yb.src.utils import get_device_strategy, parse_args, set_seed -from linguify_yb.src.utils.logger_util import logger - - -try: - dataset_paths = "dev_samples.json" # On kaggle replace with "dataset_paths.json" to train on full data - with open(dataset_paths, "r", encoding="utf-8") as json_file: - data_dict = json.load(json_file) - LANDMARK_DIR = "/kaggle/input/asl-fingerspelling/train_landmarks" - MODEL_DIR = "model.pt" - - # Training dataset - train_dataset = data_dict["train_files"] - train_file_ids = [os.path.basename(file) for file in train_dataset] - train_file_ids = [ - int(file_name.replace(".parquet", "")) for file_name in train_file_ids - ] - assert len(train_dataset) == len( - train_file_ids - ), "Failed import of Train files path " - TRAIN_DS_FILES = list(zip(train_dataset, train_file_ids)) - - # Validation dataset - valid_dataset = data_dict["valid_files"] - valid_file_ids = [os.path.basename(file) for file in valid_dataset] - valid_file_ids = [ - int(file_name.replace(".parquet", "")) for file_name in valid_file_ids - ] - assert len(train_dataset) == len( - train_file_ids - ), "Failed Import of Valid Files path" - VALID_DS_FILES = list(zip(valid_dataset, valid_file_ids)) -except AssertionError as asset_error: - logger.exception(f"failed {asset_error}") - - -def train(model, optim, loss_func, n_epochs, batch, device): - # To ensure reproducibility of the training process - set_seed() - train_losses = [] - val_losses = [] - val_dataloader = TEST_LOADER # get_dataloader(TRAIN_FILES[0][0], TRAIN_FILES[0][1], batch_size=batch) - - for epoch in range(n_epochs): - logger.info(f"Training on epoch {epoch}.") - total_epochs = epoch - file_train_loss = [] - for file, file_id in TRAIN_DS_FILES: - train_dataloader = ( - TEST_LOADER # get_dataloader(file, file_id, batch_size=batch) - ) - - # Performs training using mini-batches - train_loss = mini_batch( - model, train_dataloader, optim, loss_func, device, validation=False - ) - file_train_loss.append(train_loss) - train_loss = np.mean(file_train_loss) - train_losses.append(train_loss) - - # Performs evaluation using mini-batches - logger.info("Starting validation.") - with torch.no_grad(): - val_loss = mini_batch( - model, val_dataloader, optim, loss_func, device, validation=True - ) - val_losses.append(val_loss) - - wandb.log( - { - "train_loss": train_loss, - "val_loss": val_loss, - "epoch": epoch, - } - ) - - if epoch // 2 == 0: - logger.info("Initiating checkpoint. Saving model and optimizer states.") - save_checkpoint( - MODEL_DIR, model, optim, total_epochs, train_losses, val_losses - ) - - -def mini_batch( - model, dataloader, mini_batch_optim, loss_func, device, validation=False -): - # The mini-batch can be used with both loaders - # The argument `validation`defines which loader and - # corresponding step function is going to be used - if validation: - step_func = val_step_func(model, loss_func) - else: - step_func = train_step_func(model, mini_batch_optim, loss_func) - - # Once the data loader and step function, this is the same - # mini-batch loop we had before - mini_batch_losses = [] - for x_batch, y_batch in dataloader: - x_batch = x_batch.to(device) - y_batch = y_batch.to(device) - loss = step_func(x=x_batch, y=y_batch) - mini_batch_losses.append(loss) - loss = np.mean(mini_batch_losses) - return loss - - -def train_step_func(model, optim_, loss_func): - def perform_train_step_fn(x, y): - model.train() - preds = model(x) - loss = loss_func(preds, y) - loss.backward() - optim_.step() - optim_.zero_grad() - return loss.item() - - return perform_train_step_fn - - -def val_step_func(model, loss_func): - def perform_val_step_fn(x, y): - model.eval() - preds = model(x) - loss = loss_func(preds, y) - return loss.item() - - return perform_val_step_fn - - -def save_checkpoint(filename, model, optimizer, total_epochs, train_losses, val_losses): - # Builds dictionary with all elements for resuming training - checkpoint = { - "epoch": total_epochs, - "model_state_dict": model.state_dict(), - "optimizer_state_dict": optimizer.state_dict(), - "loss": train_losses, - "val_loss": val_losses, - } - - torch.save(checkpoint, filename) - - -def load_checkpoint(model, optimizer, filename): - # Loads dictionary - checkpoint = torch.load(filename) - - # Restore state for model and optimizer - model.load_state_dict(checkpoint["model_state_dict"]) - optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) - - total_epochs = checkpoint["epoch"] - losses = checkpoint["loss"] - val_losses = checkpoint["val_loss"] - return model - - -def main(arg): - logger.info(f"Starting training on {arg.model}") - - DEVICE = get_device_strategy(tpu=arg.tpu) - logger.info(f"Training on {DEVICE} for {arg.epochs} epochs.") - - model = ModelLoader().get_model(arg.model) - - optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) - criterion = nn.CrossEntropyLoss(label_smoothing=0.1) - model = model.to(DEVICE) - - # Optimizes given model/function using TorchDynamo and specified backend - torch.compile(model) - - logger.info("training") - wandb.init( - project="ASL-project", - config={ - "learning_rate": 0.01, - "architecture": "Test Model", - "dataset": "Google ASL Landmarks", - "epochs": 12, - }, - ) - - wandb.watch(model) - try: - train( - model=arg.model, - optim=optimizer, - loss_func=criterion, - n_epochs=arg.epochs, - batch=arg.batch, - device=DEVICE, - ) - logger.success(f"Training completed: {arg.epochs} epochs on {DEVICE}.") - - except Exception as error: - logger.exception(f"Training failed due to an {error}.") - - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/linguify_yb/src/utils/__init__.py b/linguify_yb/src/utils/__init__.py deleted file mode 100644 index 8da12c8d..00000000 --- a/linguify_yb/src/utils/__init__.py +++ /dev/null @@ -1,74 +0,0 @@ -import os -import random -import argparse - -import numpy as np -import torch - -import torch_xla.core.xla_model as xm - - -def set_seed(seed: int = 42) -> None: - np.random.seed(seed) - random.seed(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed(seed) - # When running on the CuDNN backend, two further options must be set - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - # Set a fixed value for the hash seed - os.environ["PYTHONHASHSEED"] = str(seed) - - -def get_device_strategy(tpu=False): - if tpu: - device = xm.xla_device() - else: - device = torch.device("cuda" if torch.cuda.is_availabe() else "cpu") - return device - - -def parse_args(): - """ - Parse arguments given to the script. - - Returns: - The parsed argument object. - """ - parser = argparse.ArgumentParser( - description="Run distributed data-parallel training and log with wandb." - ) - - parser.add_argument( - "--model", - default="asl_transfomer", - type=str, - metavar="N", - help="name of model to train", - ) - - parser.add_argument( - "--epochs", - default=2, - type=int, - metavar="N", - help="number of total epochs to run", - ) - parser.add_argument( - "--batch", - default=32, - type=int, - metavar="N", - help="number of data samples in one batch", - ) - parser.add_argument( - "--tpu", - default=False, - type=bool, - metavar="N", - help="Train on TPU Device", - ) - parser.add_argument('--resume_checkpoint', type=bool, help='Path to the checkpoint for resuming training') - - args = parser.parse_args() - return args diff --git a/linguify_yb/src/utils/benchmark,py b/linguify_yb/src/utils/benchmark,py deleted file mode 100644 index e9e4e5b9..00000000 --- a/linguify_yb/src/utils/benchmark,py +++ /dev/null @@ -1,50 +0,0 @@ -"""doc -""" - -import profile_macs -import torch.nn as nn - - -Byte = 8 -KiB = 1024 * Byte -MiB = 1024 * KiB -GiB = 1024 * MiB - - -def get_model_macs(model, inputs) -> int: - return profile_macs(model, inputs) - - -def get_model_sparsity(model: nn.Module) -> float: - """ - calculate the sparsity of the given model - sparsity = #zeros / #elements = 1 - #nonzeros / #elements - """ - num_nonzeros, num_elements = 0, 0 - for param in model.parameters(): - num_nonzeros += param.count_nonzero() - num_elements += param.numel() - return 1 - float(num_nonzeros) / num_elements - - -def get_num_parameters(model: nn.Module, count_nonzero_only=False) -> int: - """ - calculate the total number of parameters of model - :param count_nonzero_only: only count nonzero weights - """ - num_counted_elements = 0 - for param in model.parameters(): - if count_nonzero_only: - num_counted_elements += param.count_nonzero() - else: - num_counted_elements += param.numel() - return num_counted_elements - - -def get_model_size(model: nn.Module, data_width=32, count_nonzero_only=False) -> int: - """ - calculate the model size in bits - :param data_width: #bits per element - :param count_nonzero_only: only count nonzero weights - """ - return get_num_parameters(model, count_nonzero_only) * data_width diff --git a/linguify_yb/tests/test_data_ingestion.py b/linguify_yb/tests/test_data_ingestion.py deleted file mode 100644 index e69de29b..00000000 diff --git a/linguify_yb/tests/test_model.py b/linguify_yb/tests/test_model.py deleted file mode 100644 index e69de29b..00000000 diff --git a/linguify_yb/tests/test_pipeline.py b/linguify_yb/tests/test_pipeline.py deleted file mode 100644 index e69de29b..00000000 From b6e5e95486ae9c70adc805f100af7b64102108e6 Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Mon, 11 Dec 2023 02:46:12 +0100 Subject: [PATCH 06/16] [add] added units test --- linguify_yb/src/tests/test_data_ingestion.py | 8 ++++ linguify_yb/src/tests/test_model.py | 39 ++++++++++++++++++++ linguify_yb/src/tests/test_pipeline.py | 0 3 files changed, 47 insertions(+) create mode 100644 linguify_yb/src/tests/test_data_ingestion.py create mode 100644 linguify_yb/src/tests/test_model.py create mode 100644 linguify_yb/src/tests/test_pipeline.py diff --git a/linguify_yb/src/tests/test_data_ingestion.py b/linguify_yb/src/tests/test_data_ingestion.py new file mode 100644 index 00000000..b4e86067 --- /dev/null +++ b/linguify_yb/src/tests/test_data_ingestion.py @@ -0,0 +1,8 @@ +"doc" + +import torch +import pytest + + +def test_data_columns(): + pass diff --git a/linguify_yb/src/tests/test_model.py b/linguify_yb/src/tests/test_model.py new file mode 100644 index 00000000..c1206d1c --- /dev/null +++ b/linguify_yb/src/tests/test_model.py @@ -0,0 +1,39 @@ +"""doc +""" + +import pytest + +import torch +from torch.utils.data import DataLoader +from src.models.baseline_transformer import ASLTransformer + +@pytest.fixture +def baseline_model(): + """_summary_""" + model = ASLTransformer() + return model + + +@pytest.mark.parametrize( + "inputs_x, target_y", [(torch.randn(128, 345), torch.randint(0, 60, (64,)))] +) +def test_baseline_transformer_output_shape(baseline_model, inputs_x, target_y): + """_summary_""" + output = baseline_model(inputs_x, target_y) + # Assert + expected_output_shape = (64, 62) + assert output.shape == expected_output_shape + +@pytest.mark.parametrize("inputs_x", [(torch.randn(128,345)), (torch.randn(128,345))]) +def test_baseline_transformer_generate_out( + baseline_model, + inputs_x, +): + """_summary_""" + output = baseline_model.generate(inputs_x) + # Assert + expected_output_len = 64 + assert len(output) == expected_output_len + +@pytest.mark.parametrize("input_shape", [(batch_size, input_dim) for batch_size in [1, 2, 5] for input_dim in [32, 64, 128]]) +def \ No newline at end of file diff --git a/linguify_yb/src/tests/test_pipeline.py b/linguify_yb/src/tests/test_pipeline.py new file mode 100644 index 00000000..e69de29b From bd8a07549da2c67219511f955d3616d9f5526eaa Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Mon, 11 Dec 2023 02:46:49 +0100 Subject: [PATCH 07/16] [add] documentaiion for baseline transformer --- .../src/models/baseline_transformer.py | 236 ++++++++++++------ 1 file changed, 161 insertions(+), 75 deletions(-) diff --git a/linguify_yb/src/models/baseline_transformer.py b/linguify_yb/src/models/baseline_transformer.py index 02d76856..2ab43725 100644 --- a/linguify_yb/src/models/baseline_transformer.py +++ b/linguify_yb/src/models/baseline_transformer.py @@ -1,26 +1,65 @@ -"""doc """ +Baseline Transformer Module +This module contains the implementation of a Transformer model for sign language tasks. + +Classes: +- TokenEmbedding: Create embedding for the target seqeunce +- LandmarkEmbedding: Create embedding for the source(frames)seqeunce +- Encoder: Implements the transformer encoder stack. +- Decoder: Implements the transformer decoder stack. +- Transformer: The main transformer model class with methods for training and inference. + +Methods: +- Transformer.generate: Perform inference on a new sequence +""" import torch from torch import nn class TokenEmbedding(nn.Module): - def __init__(self, num_vocab=1000, maxlen=100, num_hid=200): + """_summary_""" + + def __init__(self, num_vocab, maxlen, embedding_dim): + """_summary_ + + Parameters + ---------- + num_vocab : _type_ + _description_ + maxlen : _type_ + _description_ + embedding_dim : _type_ + _description_ + """ super().__init__() - self.emb = nn.Embedding(num_vocab, num_hid) - self.pos_emb = nn.Embedding(maxlen, num_hid) + self.token_embed_layer = nn.Embedding(num_vocab, embedding_dim) + self.postion_embed_layer = nn.Embedding(maxlen, embedding_dim) def forward(self, x): + """_summary_ + + Parameters + ---------- + x : _type_ + _description_ + + Returns + ------- + _type_ + _description_ + """ maxlen = x.size(-1) - x = self.emb(x) + x = self.token_embed_layer(x) positions = torch.arange(0, maxlen).to(x.device) - positions = self.pos_emb(positions) + positions = self.postion_embed_layer(positions) return x + positions class LandmarkEmbedding(nn.Module): - def __init__(self, num_hid=64, maxlen=100): + """_summary_""" + + def __init__(self, embedding_dim): super().__init__() # Calculate the padding for "same" padding padding = (11 - 1) // 2 @@ -37,7 +76,7 @@ def __init__(self, num_hid=64, maxlen=100): ) # Output embedding layer - self.embedding_layer = nn.Linear(256, num_hid) + self.embedding_layer = nn.Linear(256, embedding_dim) def forward(self, x): # Input x should have shape (batch_size, input_size) @@ -58,141 +97,189 @@ def forward(self, x): class TransformerEncoder(nn.Module): + """_summary_""" + def __init__( self, - embed_dim, + embedding_dim, num_heads, feed_forward_dim, rate=0.1, ): + """_summary_ + + Parameters + ---------- + embedding_dim : _type_ + _description_ + num_heads : _type_ + _description_ + feed_forward_dim : _type_ + _description_ + rate : float, optional + _description_, by default 0.1 + """ super().__init__() - self.att = nn.MultiheadAttention(embed_dim, num_heads) + self.multi_attention = nn.MultiheadAttention(embedding_dim, num_heads) self.ffn = nn.Sequential( - nn.Linear(embed_dim, feed_forward_dim), + nn.Linear(embedding_dim, feed_forward_dim), nn.ReLU(), - nn.Linear(feed_forward_dim, embed_dim), + nn.Linear(feed_forward_dim, embedding_dim), ) - self.layernorm1 = nn.LayerNorm(embed_dim, eps=1e-6) - self.layernorm2 = nn.LayerNorm(embed_dim, eps=1e-6) + self.layernorm1 = nn.LayerNorm(embedding_dim, eps=1e-6) + self.layernorm2 = nn.LayerNorm(embedding_dim, eps=1e-6) self.dropout1 = nn.Dropout(rate) self.dropout2 = nn.Dropout(rate) - def forward(self, inputs): - attn_out, _ = self.att(inputs, inputs, inputs) - attn_out = self.dropout1(attn_out) - out1 = self.layernorm1(inputs + attn_out) + def forward(self, inputs_x): + multi_attention_out, _ = self.multi_attention(inputs_x, inputs_x, inputs_x) + multi_attention_out = self.dropout1(multi_attention_out) + out1 = self.layernorm1(inputs_x + multi_attention_out) ffn_out = self.ffn(out1) ffn_out = self.dropout2(ffn_out) x = self.layernorm2(out1 + ffn_out) - print(f"endocder{x.shape}") return x class TransformerDecoder(nn.Module): - def __init__(self, embed_dim, num_heads, feed_forward_dim, dropout_rate=0.1): + """_summary_""" + + def __init__(self, embedding_dim, num_heads, feed_forward_dim, dropout_rate=0.1): super().__init__() - self.layernorm1 = nn.LayerNorm(embed_dim, eps=1e-6) - self.layernorm2 = nn.LayerNorm(embed_dim, eps=1e-6) - self.layernorm3 = nn.LayerNorm(embed_dim, eps=1e-6) - self.self_att = nn.MultiheadAttention(embed_dim, num_heads) - self.enc_att = nn.MultiheadAttention(embed_dim, num_heads) - self.self_dropout = nn.Dropout(0.5) - self.enc_dropout = nn.Dropout(0.1) - self.ffn_dropout = nn.Dropout(0.1) + self.num_heads_ = num_heads + self.layernorm1 = nn.LayerNorm(embedding_dim, eps=1e-6) + self.layernorm2 = nn.LayerNorm(embedding_dim, eps=1e-6) + self.layernorm3 = nn.LayerNorm(embedding_dim, eps=1e-6) + self.decoder_multi_attention = nn.MultiheadAttention(embedding_dim, num_heads) + self.encoder_multi_attention = nn.MultiheadAttention(embedding_dim, num_heads) + self.decoder_dropout = nn.Dropout(0.5) + self.encoder_dropout = nn.Dropout(dropout_rate) + self.ffn_dropout = nn.Dropout(dropout_rate) self.ffn = nn.Sequential( - nn.Linear(embed_dim, feed_forward_dim), + nn.Linear(embedding_dim, feed_forward_dim), nn.ReLU(), - nn.Linear(feed_forward_dim, embed_dim), + nn.Linear(feed_forward_dim, embedding_dim), ) - def causal_attention_mask( - self, sequence_length, batch_size=1, num_heads=4, device="cpu" - ): + def _causal_attention_mask(self, sequence_length, batch_size=1, device=None): mask = torch.triu(torch.ones(sequence_length, sequence_length), diagonal=1).to( device ) mask = mask.unsqueeze(0).expand( - batch_size * num_heads, sequence_length, sequence_length + batch_size * self.num_heads_, sequence_length, sequence_length ) return mask def forward( self, - enc_out, + encoder_out, src_target_, ): input_shape = src_target_.size() batch_size = 1 # input_shape[0] seq_len = input_shape[0] - mask = self.causal_attention_mask(seq_len, batch_size=batch_size) - target_att, _ = self.self_att( - src_target_, src_target_, src_target_, attn_mask=mask + x_device = src_target_.device + + # Mask + causal_mask = self._causal_attention_mask( + sequence_length=seq_len, batch_size=batch_size, device=x_device ) - target_norm = self.layernorm1(src_target_ + self.self_dropout(target_att)) - enc_out, _ = self.enc_att(target_norm, enc_out, enc_out) - enc_out_norm = self.layernorm2(enc_out + self.enc_dropout(enc_out)) + target_att, _ = self.decoder_multi_attention( + src_target_, src_target_, src_target_, attn_mask=causal_mask + ) + target_norm_out = self.layernorm1( + src_target_ + self.decoder_dropout(target_att) + ) + + encoder_out, _ = self.encoder_multi_attention( + target_norm_out, encoder_out, encoder_out + ) + enc_out_norm = self.layernorm2(encoder_out + self.encoder_dropout(encoder_out)) ffn_out = self.ffn(enc_out_norm) ffn_out_norm = self.layernorm3(enc_out_norm + self.ffn_dropout(ffn_out)) - print(f"decoder - {ffn_out_norm.shape}") return ffn_out_norm -class NTransformer(nn.Module): +class ASLTransformer(nn.Module): def __init__( self, - num_hid=64, - num_head=8, + num_hidden_dim=64, + multi_num_head=8, num_feed_forward=128, - source_maxlen=100, - target_maxlen=100, + target_maxlen=64, num_layers_enc=4, num_layers_dec=4, ): + """_summary_ + + Parameters + ---------- + num_hidden_dim : int, optional + _description_, by default 64 + multi_num_head : int, optional + _description_, by default 8 + num_feed_forward : int, optional + _description_, by default 128 + target_maxlen : int, optional + _description_, by default 64 + num_layers_enc : int, optional + _description_, by default 4 + num_layers_dec : int, optional + _description_, by default 4 + """ super().__init__() self.num_layers_enc = num_layers_enc self.num_layers_dec = num_layers_dec self.target_maxlen = target_maxlen self.num_classes = 62 - self.enc_input = LandmarkEmbedding(num_hid=num_hid, maxlen=source_maxlen) - self.dec_input = TokenEmbedding( - num_vocab=self.num_classes, maxlen=target_maxlen + self.encoder_input = LandmarkEmbedding(embedding_dim=num_hidden_dim) + self.decoder_input = TokenEmbedding( + num_vocab=self.num_classes, + embedding_dim=num_hidden_dim, + maxlen=target_maxlen, ) self.encoder = nn.Sequential( - self.enc_input, + self.encoder_input, *[ - TransformerEncoder(num_hid, num_head, num_feed_forward) + TransformerEncoder( + embedding_dim=num_hidden_dim, + num_heads=multi_num_head, + feed_forward_dim=num_feed_forward, + ) for _ in range(num_layers_enc) ], ) for i in range(num_layers_dec): self.add_module( - f"dec_layer_{i}", - TransformerDecoder(num_hid, num_head, num_feed_forward), + f"decoder_layer_{i}", + TransformerDecoder( + embedding_dim=num_hidden_dim, + num_heads=multi_num_head, + feed_forward_dim=num_feed_forward, + ), ) - self.classifier = nn.Linear(num_hid, self.num_classes) + self.classifier = nn.Linear( + in_features=num_hidden_dim, out_features=self.num_classes + ) def forward(self, source, target): - x = self.encoder(source) - y = self.decoder_run(x, target) - print(y.shape) - return self.classifier(y) - - def decoder_run(self, enc_out, target): - print(f"before emb {target.shape}") - y = self.dec_input(target) - print(f"after emb {y.shape}") + encoder_out = self.encoder(source) + transformer_output = self._decoder_run(encoder_out, target) + return self.classifier(transformer_output) + def _decoder_run(self, enc_out, target): + decoder_out = self.decoder_input(target) for i in range(self.num_layers_dec): - y = getattr(self, f"dec_layer_{i}")(enc_out, y) - return y + decoder_out = getattr(self, f"decoder_layer_{i}")(enc_out, decoder_out) + return decoder_out def generate(self, source, target_start_token_idx=60): """Performs inference over one batch of inputs using greedy decoding @@ -201,7 +288,7 @@ def generate(self, source, target_start_token_idx=60): ---------- source : _type_ _description_ - target_start_token_idx : _type_ + target_start_token_idx : int _description_ Returns @@ -209,18 +296,17 @@ def generate(self, source, target_start_token_idx=60): _type_ _description_ """ - bs = source.size(0) - enc = self.encoder(source) - dec_input = ( + encoder_out = self.encoder(source) + decoder_input = ( torch.ones((1), dtype=torch.long).to(source.device) * target_start_token_idx ) dec_logits = [] - for i in range(self.target_maxlen - 1): - dec_out = self.decoder_run(enc, dec_input) - logits = self.classifier(dec_out) + for _ in range(self.target_maxlen - 1): + decoder_out = self._decoder_run(encoder_out, decoder_input) + logits = self.classifier(decoder_out) logits = torch.argmax(logits, dim=-1, keepdim=True) last_logit = logits[-1] dec_logits.append(last_logit) - dec_input = torch.cat([dec_input, last_logit], dim=-1) - return dec_input + decoder_input = torch.cat([decoder_input, last_logit], dim=-1) + return decoder_input From 9a9915b77b80a33feb995fac27e23cfccfe321c9 Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Mon, 11 Dec 2023 02:47:20 +0100 Subject: [PATCH 08/16] [add] added model benchmark script --- linguify_yb/src/benchmark.py | 58 ++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 linguify_yb/src/benchmark.py diff --git a/linguify_yb/src/benchmark.py b/linguify_yb/src/benchmark.py new file mode 100644 index 00000000..4c710533 --- /dev/null +++ b/linguify_yb/src/benchmark.py @@ -0,0 +1,58 @@ +"""doc +""" +from torchprofile import profile_macs +from torch import nn + + +Byte = 8 +KiB = 1024 * Byte +MiB = 1024 * KiB +GiB = 1024 * MiB + + +class BenchMarker: + def __init__(self) -> None: + pass + + def get_model_macs(self, model, inputs=None) -> int: + return profile_macs(model, inputs) + + def get_model_sparsity(self, model: nn.Module) -> float: + """ + calculate the sparsity of the given model + sparsity = #zeros / #elements = 1 - #nonzeros / #elements + """ + num_nonzeros, num_elements = 0, 0 + for param in model.parameters(): + num_nonzeros += param.count_nonzero() + num_elements += param.numel() + return 1 - float(num_nonzeros) / num_elements + + def get_num_parameters(self, model: nn.Module, count_nonzero_only=False) -> int: + """ + calculate the total number of parameters of model + :param count_nonzero_only: only count nonzero weights + """ + num_counted_elements = 0 + for param in model.parameters(): + if count_nonzero_only: + num_counted_elements += param.count_nonzero() + else: + num_counted_elements += param.numel() + return num_counted_elements + + def get_model_size( + self, model: nn.Module, data_width=32, count_nonzero_only=False + ) -> int: + """ + calculate the model size in bits + :param data_width: #bits per element + :param count_nonzero_only: only count nonzero weights + """ + return self.get_num_parameters(model, count_nonzero_only) * data_width + + def runner(self, model): + model_macs = self.get_model_macs(model) + model_sparsity = self.get_model_sparsity(model) + model_num_params = self.get_num_parameters(model) + model_size = self.get_model_size(model) From 321987f4395d45880375721a541015d064374709 Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Mon, 11 Dec 2023 02:49:21 +0100 Subject: [PATCH 09/16] [add] updates readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 12455609..320d59a3 100644 --- a/README.md +++ b/README.md @@ -56,9 +56,9 @@ $ source $(poetry env info --path)/bin/activate Here's a glimpse of the exciting features we plan to implement in the coming weeks: -- [x] Add project's documentation (you are reading it now), create issues and milestones, setup document's stub, suggest - page layout and styling. +- [x] Add project's documentation - [] Develop a Proof of Concept System +- [] Deployment of Proof of Concept System ## Acknowledgments From 85d3d02ba54fb85fa5e754ef04493531b78c886d Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Mon, 11 Dec 2023 03:28:08 +0100 Subject: [PATCH 10/16] [add] test workflow --- .github/workflows/run_units_test.yml | 29 ++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/run_units_test.yml diff --git a/.github/workflows/run_units_test.yml b/.github/workflows/run_units_test.yml new file mode 100644 index 00000000..43fa0cb2 --- /dev/null +++ b/.github/workflows/run_units_test.yml @@ -0,0 +1,29 @@ +name: Units Tests + +on: + push: + branches: + - main + +jobs: + units-test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Use Node.js 16 + uses: actions/setup-node@v3 + with: + node-version: 16 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.10 + + - name: Pytest + run: | + cd linguify + pytest \ No newline at end of file From b337d33195bcf07ff0c99bce7ad77640774d8739 Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Mon, 11 Dec 2023 06:08:27 +0100 Subject: [PATCH 11/16] [add] mics --- linguify_yb/development/code_dev.ipynb | 161 ++++++ linguify_yb/development/data_dev.ipynb | 39 -- linguify_yb/development/dev.ipynb | 112 +--- linguify_yb/development/trans_dev.ipynb | 697 ++++++++++++++++++++++++ 4 files changed, 876 insertions(+), 133 deletions(-) create mode 100644 linguify_yb/development/code_dev.ipynb create mode 100644 linguify_yb/development/trans_dev.ipynb diff --git a/linguify_yb/development/code_dev.ipynb b/linguify_yb/development/code_dev.ipynb new file mode 100644 index 00000000..a41f02c2 --- /dev/null +++ b/linguify_yb/development/code_dev.ipynb @@ -0,0 +1,161 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def resize_pad(x):\n", + " if x.shape[0] < FRAME_LEN:\n", + " x = F.pad(x, (0, 0, 0, FRAME_LEN - x.shape[0], 0, 0))\n", + " else:\n", + " x = x.unsqueeze(0) # Add batch and channel dimensions\n", + " x = torch.nn.functional.interpolate(\n", + " x, size=(FRAME_LEN, x.shape[1]), mode=\"bilinear\", align_corners=False\n", + " ).squeeze(0)\n", + "\n", + " return x\n", + "\n", + "\n", + "def frames_preprocess(x):\n", + " x = torch.tensor(x)\n", + " rhand = x[:, RHAND_IDX]\n", + " lhand = x[:, LHAND_IDX]\n", + " rpose = x[:, RPOSE_IDX]\n", + " lpose = x[:, LPOSE_IDX]\n", + "\n", + " rnan_idx = torch.any(torch.isnan(rhand), dim=1)\n", + " lnan_idx = torch.any(torch.isnan(lhand), dim=1)\n", + "\n", + " rnans = torch.sum(rnan_idx)\n", + " lnans = torch.sum(lnan_idx)\n", + "\n", + " if rnans > lnans:\n", + " hand = lhand\n", + " pose = lpose\n", + "\n", + " hand_x = hand[:, 0 * (len(LHAND_IDX) // 3) : 1 * (len(LHAND_IDX) // 3)]\n", + " hand_y = hand[:, 1 * (len(LHAND_IDX) // 3) : 2 * (len(LHAND_IDX) // 3)]\n", + " hand_z = hand[:, 2 * (len(LHAND_IDX) // 3) : 3 * (len(LHAND_IDX) // 3)]\n", + " hand = torch.cat([1 - hand_x, hand_y, hand_z], dim=1)\n", + "\n", + " pose_x = pose[:, 0 * (len(LPOSE_IDX) // 3) : 1 * (len(LPOSE_IDX) // 3)]\n", + " pose_y = pose[:, 1 * (len(LPOSE_IDX) // 3) : 2 * (len(LPOSE_IDX) // 3)]\n", + " pose_z = pose[:, 2 * (len(LPOSE_IDX) // 3) : 3 * (len(LPOSE_IDX) // 3)]\n", + " pose = torch.cat([1 - pose_x, pose_y, pose_z], dim=1)\n", + " else:\n", + " hand = rhand\n", + " pose = rpose\n", + "\n", + " hand_x = hand[:, 0 * (len(LHAND_IDX) // 3) : 1 * (len(LHAND_IDX) // 3)]\n", + " hand_y = hand[:, 1 * (len(LHAND_IDX) // 3) : 2 * (len(LHAND_IDX) // 3)]\n", + " hand_z = hand[:, 2 * (len(LHAND_IDX) // 3) : 3 * (len(LHAND_IDX) // 3)]\n", + " hand = torch.cat(\n", + " [hand_x.unsqueeze(-1), hand_y.unsqueeze(-1), hand_z.unsqueeze(-1)], dim=-1\n", + " )\n", + "\n", + " mean = torch.mean(hand, dim=1).unsqueeze(1)\n", + " std = torch.std(hand, dim=1).unsqueeze(1)\n", + " hand = (hand - mean) / std\n", + "\n", + " pose_x = pose[:, 0 * (len(LPOSE_IDX) // 3) : 1 * (len(LPOSE_IDX) // 3)]\n", + " pose_y = pose[:, 1 * (len(LPOSE_IDX) // 3) : 2 * (len(LPOSE_IDX) // 3)]\n", + " pose_z = pose[:, 2 * (len(LPOSE_IDX) // 3) : 3 * (len(LPOSE_IDX) // 3)]\n", + " pose = torch.cat(\n", + " [pose_x.unsqueeze(-1), pose_y.unsqueeze(-1), pose_z.unsqueeze(-1)], dim=-1\n", + " )\n", + "\n", + " x = torch.cat([hand, pose], dim=1)\n", + " # print(f\"befor re{x.shape}\")\n", + " x = resize_pad(x)\n", + " # print(f\"after re{x.shape}\")\n", + " x = torch.where(torch.isnan(x), torch.zeros_like(x), x)\n", + " # print(x.shape)\n", + "\n", + " #! CRITICAL Debug\n", + " # x = x.view(FRAME_LEN, len(LHAND_IDX) + len(LPOSE_IDX))\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"doc\n", + "\"\"\"\n", + "\n", + "FRAME_LEN = 128\n", + "\n", + "LPOSE = [13, 15, 17, 19, 21]\n", + "RPOSE = [14, 16, 18, 20, 22]\n", + "POSE = LPOSE + RPOSE\n", + "\n", + "X = (\n", + " [f\"x_right_hand_{i}\" for i in range(21)]\n", + " + [f\"x_left_hand_{i}\" for i in range(21)]\n", + " + [f\"x_pose_{i}\" for i in POSE]\n", + ")\n", + "Y = (\n", + " [f\"y_right_hand_{i}\" for i in range(21)]\n", + " + [f\"y_left_hand_{i}\" for i in range(21)]\n", + " + [f\"y_pose_{i}\" for i in POSE]\n", + ")\n", + "Z = (\n", + " [f\"z_right_hand_{i}\" for i in range(21)]\n", + " + [f\"z_left_hand_{i}\" for i in range(21)]\n", + " + [f\"z_pose_{i}\" for i in POSE]\n", + ")\n", + "\n", + "FEATURE_COLUMNS = X + Y + Z\n", + "\n", + "X_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if \"x_\" in col]\n", + "Y_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if \"y_\" in col]\n", + "Z_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if \"z_\" in col]\n", + "\n", + "RHAND_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if \"right\" in col]\n", + "LHAND_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if \"left\" in col]\n", + "RPOSE_IDX = [\n", + " i\n", + " for i, col in enumerate(FEATURE_COLUMNS)\n", + " if \"pose\" in col and int(col[-2:]) in RPOSE\n", + "]\n", + "LPOSE_IDX = [\n", + " i\n", + " for i, col in enumerate(FEATURE_COLUMNS)\n", + " if \"pose\" in col and int(col[-2:]) in LPOSE\n", + "]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/linguify_yb/development/data_dev.ipynb b/linguify_yb/development/data_dev.ipynb index 2fd7178f..cade00e3 100644 --- a/linguify_yb/development/data_dev.ipynb +++ b/linguify_yb/development/data_dev.ipynb @@ -13,45 +13,6 @@ "import pyarrow.parquet as pq" ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total Space: 1399.98 GB\n", - "Used Space: 455.25 GB\n", - "Free Space: 944.73 GB\n", - "Percentage Used: 32.50%\n" - ] - } - ], - "source": [ - "import psutil\n", - "\n", - "def get_storage_space():\n", - " # Get disk usage statistics\n", - " disk_usage = psutil.disk_usage('/')\n", - "\n", - " # Extract relevant information\n", - " total_space = disk_usage.total # Total storage space\n", - " used_space = disk_usage.used # Used storage space\n", - " free_space = disk_usage.free # Free storage space\n", - " percent_used = disk_usage.percent # Percentage of used space\n", - "\n", - " # Print or return the information\n", - " print(f\"Total Space: {total_space / (1024 ** 3):.2f} GB\")\n", - " print(f\"Used Space: {used_space / (1024 ** 3):.2f} GB\")\n", - " print(f\"Free Space: {free_space / (1024 ** 3):.2f} GB\")\n", - " print(f\"Percentage Used: {percent_used:.2f}%\")\n", - "\n", - "# Call the function to get storage space information\n", - "get_storage_space()\n" - ] - }, { "cell_type": "code", "execution_count": 16, diff --git a/linguify_yb/development/dev.ipynb b/linguify_yb/development/dev.ipynb index 273dbbb5..9224fe47 100644 --- a/linguify_yb/development/dev.ipynb +++ b/linguify_yb/development/dev.ipynb @@ -4,25 +4,6 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Note: you may need to restart the kernel to use updated packages.\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], - "source": [ - "%pip install torch --quiet\n", - "%pip install torchprofile --quiet" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, "outputs": [ { "name": "stdout", @@ -36,8 +17,9 @@ "import random\n", "import os\n", "import torch\n", + "import torch.nn as nn\n", "import numpy as np\n", - "\n", + "from torchprofile import profile_macs\n", "\n", "def set_seed(seed: int = 42) -> None:\n", " np.random.seed(seed)\n", @@ -56,85 +38,34 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "from torch import nn\n", - "import os\n", - "#from torchprofile import profile_macs" - ] - }, - { - "cell_type": "code", - "execution_count": 21, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "logits = torch.rand(64,64)" + "ans = [(torch.randn(batch_size, 128, 345), torch.randint(0, 60, (batch_size, 64)), batch_size) \n", + " for batch_size in [1,2,4,8]]" ] }, { "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "dec_logits = []\n", - "dec_input = (torch.ones((1), dtype=torch.long)* 60)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "logits = torch.argmax(logits, dim=-1, keepdim=True) \n", - "last_logit = logits[:, -1]\n", - "dec_logits.append(last_logit)\n", - "dec_input = torch.cat([dec_input, last_logit], dim=-1)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, + "execution_count": 9, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "torch.Size([64])" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "last_logit.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "torch.Size([65])" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([1, 128, 345]) torch.Size([1, 64]) 1\n", + "torch.Size([2, 128, 345]) torch.Size([2, 64]) 2\n", + "torch.Size([4, 128, 345]) torch.Size([4, 64]) 4\n", + "torch.Size([8, 128, 345]) torch.Size([8, 64]) 8\n" + ] } ], "source": [ - "dec_input.shape" + "for b in ans:\n", + " x, y , bs = b\n", + " print(x.shape, y.shape,bs)" ] }, { @@ -142,14 +73,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "for idx in preds[i, :]:\n", - " prediction += self.idx_to_char[idx]\n", - " if idx == 60:\n", - " break\n", - "print(f\"target: {target_text.replace('-','')}\")\n", - "print(f\"prediction: {prediction}\\n\")" - ] + "source": [] } ], "metadata": { diff --git a/linguify_yb/development/trans_dev.ipynb b/linguify_yb/development/trans_dev.ipynb new file mode 100644 index 00000000..e2c673c6 --- /dev/null +++ b/linguify_yb/development/trans_dev.ipynb @@ -0,0 +1,697 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random seed set as 42\n" + ] + } + ], + "source": [ + "import random\n", + "import os\n", + "import torch\n", + "import torch.nn as nn\n", + "import numpy as np\n", + "from loguru import logger\n", + "from torchprofile import profile_macs\n", + "\n", + "def set_seed(seed: int = 42) -> None:\n", + " np.random.seed(seed)\n", + " random.seed(seed)\n", + " torch.manual_seed(seed)\n", + " torch.cuda.manual_seed(seed)\n", + " # When running on the CuDNN backend, two further options must be set\n", + " torch.backends.cudnn.deterministic = True\n", + " torch.backends.cudnn.benchmark = False\n", + " # Set a fixed value for the hash seed\n", + " os.environ[\"PYTHONHASHSEED\"] = str(seed)\n", + " print(f\"Random seed set as {seed}\")\n", + "\n", + "set_seed()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "Baseline Transformer Module\n", + "\n", + "This module contains the implementation of a Transformer model for sign language tasks.\n", + "\n", + "Classes:\n", + "- TokenEmbedding: Create embedding for the target seqeunce\n", + "- LandmarkEmbedding: Create embedding for the source(frames)seqeunce\n", + "- Encoder: Implements the transformer encoder stack.\n", + "- Decoder: Implements the transformer decoder stack.\n", + "- Transformer: The main transformer model class with methods for training and inference.\n", + "\n", + "Methods:\n", + "- Transformer.generate: Perform inference on a new sequence\n", + "\"\"\"\n", + "import torch\n", + "from torch import nn\n", + "\n", + "\n", + "class TokenEmbedding(nn.Module):\n", + " \"\"\"Embed the tokens with postion encoding\"\"\"\n", + "\n", + " def __init__(self, num_vocab, maxlen, embedding_dim):\n", + " \"\"\"_summary_\n", + "\n", + " Parameters\n", + " ----------\n", + " num_vocab : int\n", + " number of vocabulary\n", + " maxlen : int\n", + " maximuin length of sequence\n", + " embedding_dim : int\n", + " embedding output dimension\n", + " \"\"\"\n", + " super().__init__()\n", + " self.token_embed_layer = nn.Embedding(num_vocab, embedding_dim)\n", + " self.postion_embed_layer = nn.Embedding(maxlen, embedding_dim)\n", + "\n", + " def forward(self, x):\n", + " \"\"\"_summary_\n", + "\n", + " Parameters\n", + " ----------\n", + " x : tensors\n", + " _description_\n", + "\n", + " Returns\n", + " -------\n", + " tensors\n", + " _description_\n", + " \"\"\"\n", + " maxlen = x.size(-1)\n", + " x = self.token_embed_layer(x)\n", + " positions = torch.arange(0, maxlen).to(x.device)\n", + " positions = self.postion_embed_layer(positions)\n", + " return x + positions\n", + "\n", + "\n", + "class LandmarkEmbedding(nn.Module):\n", + " \"\"\"_summary_\"\"\"\n", + "\n", + " def __init__(self, embedding_dim):\n", + " super().__init__()\n", + " # Calculate the padding for \"same\" padding\n", + " padding = (11 - 1) // 2\n", + "\n", + " # Define three 1D convolutional layers with ReLU activation and stride 2\n", + " self.conv1 = nn.Conv1d(\n", + " in_channels=1, out_channels=64, kernel_size=11, stride=2, padding=padding\n", + " )\n", + " self.conv2 = nn.Conv1d(\n", + " in_channels=64, out_channels=128, kernel_size=11, stride=2, padding=padding\n", + " )\n", + " self.conv3 = nn.Conv1d(\n", + " in_channels=128, out_channels=256, kernel_size=11, stride=2, padding=padding\n", + " )\n", + "\n", + " # Output embedding layer\n", + " self.embedding_layer = nn.Linear(256, embedding_dim)\n", + "\n", + " def forward(self, x):\n", + " # Input x should have shape (batch_size, input_size, input_dim)\n", + " x = x.unsqueeze(1) # Add a channel dimension for 1D convolution\n", + "\n", + " # Apply convolutional layers with ReLU activation and stride 2\n", + " x = torch.relu(self.conv1(x))\n", + " x = torch.relu(self.conv2(x))\n", + " x = torch.relu(self.conv3(x))\n", + "\n", + " # Global average pooling to reduce spatial dimensions\n", + " x = torch.mean(x, dim=2)\n", + "\n", + " # Apply the linear embedding layer\n", + " x = self.embedding_layer(x)\n", + "\n", + " return x\n", + "\n", + "\n", + "class TransformerEncoder(nn.Module):\n", + " \"\"\"_summary_\"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " embedding_dim,\n", + " num_heads,\n", + " feed_forward_dim,\n", + " rate=0.1,\n", + " ):\n", + " \"\"\"_summary_\n", + "\n", + " Parameters\n", + " ----------\n", + " embedding_dim : _type_\n", + " _description_\n", + " num_heads : _type_\n", + " _description_\n", + " feed_forward_dim : _type_\n", + " _description_\n", + " rate : float, optional\n", + " _description_, by default 0.1\n", + " \"\"\"\n", + " super().__init__()\n", + " self.multi_attention = nn.MultiheadAttention(embedding_dim, num_heads)\n", + " self.ffn = nn.Sequential(\n", + " nn.Linear(embedding_dim, feed_forward_dim),\n", + " nn.ReLU(),\n", + " nn.Linear(feed_forward_dim, embedding_dim),\n", + " )\n", + "\n", + " self.layernorm1 = nn.LayerNorm(embedding_dim, eps=1e-6)\n", + " self.layernorm2 = nn.LayerNorm(embedding_dim, eps=1e-6)\n", + " self.dropout1 = nn.Dropout(rate)\n", + " self.dropout2 = nn.Dropout(rate)\n", + "\n", + " def forward(self, inputs_x):\n", + " multi_attention_out, _ = self.multi_attention(inputs_x, inputs_x, inputs_x)\n", + " multi_attention_out = self.dropout1(multi_attention_out)\n", + " out1 = self.layernorm1(inputs_x + multi_attention_out)\n", + "\n", + " ffn_out = self.ffn(out1)\n", + " ffn_out = self.dropout2(ffn_out)\n", + " x = self.layernorm2(out1 + ffn_out)\n", + " return x\n", + "\n", + "\n", + "class TransformerDecoder(nn.Module):\n", + " \"\"\"_summary_\"\"\"\n", + "\n", + " def __init__(self, embedding_dim, num_heads, feed_forward_dim, dropout_rate=0.1):\n", + " super().__init__()\n", + " self.num_heads_ = num_heads\n", + " self.layernorm1 = nn.LayerNorm(embedding_dim, eps=1e-6)\n", + " self.layernorm2 = nn.LayerNorm(embedding_dim, eps=1e-6)\n", + " self.layernorm3 = nn.LayerNorm(embedding_dim, eps=1e-6)\n", + " self.decoder_multi_attention = nn.MultiheadAttention(embedding_dim, num_heads)\n", + " self.encoder_multi_attention = nn.MultiheadAttention(embedding_dim, num_heads)\n", + " self.decoder_dropout = nn.Dropout(0.5)\n", + " self.encoder_dropout = nn.Dropout(dropout_rate)\n", + " self.ffn_dropout = nn.Dropout(dropout_rate)\n", + " self.ffn = nn.Sequential(\n", + " nn.Linear(embedding_dim, feed_forward_dim),\n", + " nn.ReLU(),\n", + " nn.Linear(feed_forward_dim, embedding_dim),\n", + " )\n", + "\n", + " def _causal_attention_mask(self, sequence_length, batch_size=1, device=None):\n", + " mask = torch.triu(torch.ones(sequence_length, sequence_length), diagonal=1).to(\n", + " device\n", + " )\n", + " mask = mask.unsqueeze(0).expand(\n", + " batch_size * self.num_heads_, sequence_length, sequence_length\n", + " )\n", + " return mask\n", + "\n", + " def forward(\n", + " self,\n", + " encoder_out,\n", + " src_target_,\n", + " ):\n", + " input_shape = src_target_.size()\n", + " batch_size = 1 # input_shape[0]\n", + " seq_len = input_shape[0]\n", + " x_device = src_target_.device\n", + "\n", + " # Mask\n", + " causal_mask = self._causal_attention_mask(\n", + " sequence_length=seq_len, batch_size=batch_size, device=x_device\n", + " )\n", + "\n", + " target_att, _ = self.decoder_multi_attention(\n", + " src_target_, src_target_, src_target_, attn_mask=causal_mask\n", + " )\n", + " target_norm_out = self.layernorm1(\n", + " src_target_ + self.decoder_dropout(target_att)\n", + " )\n", + "\n", + " encoder_out, _ = self.encoder_multi_attention(\n", + " target_norm_out, encoder_out, encoder_out\n", + " )\n", + " enc_out_norm = self.layernorm2(encoder_out + self.encoder_dropout(encoder_out))\n", + "\n", + " ffn_out = self.ffn(enc_out_norm)\n", + " ffn_out_norm = self.layernorm3(enc_out_norm + self.ffn_dropout(ffn_out))\n", + " return ffn_out_norm\n", + "\n", + "\n", + "class ASLTransformer(nn.Module):\n", + " def __init__(\n", + " self,\n", + " num_hidden_dim=64,\n", + " multi_num_head=8,\n", + " num_feed_forward=128,\n", + " target_maxlen=64,\n", + " num_layers_enc=4,\n", + " num_layers_dec=4,\n", + " ):\n", + " \"\"\"_summary_\n", + "\n", + " Parameters\n", + " ----------\n", + " num_hidden_dim : int, optional\n", + " _description_, by default 64\n", + " multi_num_head : int, optional\n", + " _description_, by default 8\n", + " num_feed_forward : int, optional\n", + " _description_, by default 128\n", + " target_maxlen : int, optional\n", + " _description_, by default 64\n", + " num_layers_enc : int, optional\n", + " _description_, by default 4\n", + " num_layers_dec : int, optional\n", + " _description_, by default 4\n", + " \"\"\"\n", + " super().__init__()\n", + " self.num_layers_enc = num_layers_enc\n", + " self.num_layers_dec = num_layers_dec\n", + " self.target_maxlen = target_maxlen\n", + " self.num_classes = 62\n", + "\n", + " self.encoder_input = LandmarkEmbedding(embedding_dim=num_hidden_dim)\n", + " self.decoder_input = TokenEmbedding(\n", + " num_vocab=self.num_classes,\n", + " embedding_dim=num_hidden_dim,\n", + " maxlen=target_maxlen,\n", + " )\n", + "\n", + " self.encoder = nn.Sequential(\n", + " self.encoder_input,\n", + " *[\n", + " TransformerEncoder(\n", + " embedding_dim=num_hidden_dim,\n", + " num_heads=multi_num_head,\n", + " feed_forward_dim=num_feed_forward,\n", + " )\n", + " for _ in range(num_layers_enc)\n", + " ],\n", + " )\n", + "\n", + " for i in range(num_layers_dec):\n", + " self.add_module(\n", + " f\"decoder_layer_{i}\",\n", + " TransformerDecoder(\n", + " embedding_dim=num_hidden_dim,\n", + " num_heads=multi_num_head,\n", + " feed_forward_dim=num_feed_forward,\n", + " ),\n", + " )\n", + "\n", + " self.classifier = nn.Linear(\n", + " in_features=num_hidden_dim, out_features=self.num_classes\n", + " )\n", + "\n", + " def _decoder_run(self, enc_out, target):\n", + " decoder_out = self.decoder_input(target)\n", + " for i in range(self.num_layers_dec):\n", + " decoder_out = getattr(self, f\"decoder_layer_{i}\")(enc_out, decoder_out)\n", + " return decoder_out\n", + "\n", + " def forward(self, source, target):\n", + " if len(source.shape) == 2: # Check if single input\n", + " source = source.unsqueeze(0) # Add batch dimension\n", + " if len(target.shape) == 1: # Check if single input\n", + " target = target.unsqueeze(0) # Add batch dimension\n", + "\n", + " encoder_out = self.encoder(source)\n", + " transformer_output = self._decoder_run(encoder_out, target)\n", + " return self.classifier(transformer_output)\n", + "\n", + " def generate(self, source, target_start_token_idx=60):\n", + " if len(source.shape) == 2: # Check if single input\n", + " source = source.unsqueeze(0) # Add batch dimension\n", + "\n", + " encoder_out = self.encoder(source)\n", + " decoder_input = (\n", + " torch.ones((source.shape[0], 1), dtype=torch.long)\n", + " .to(source.device)\n", + " * target_start_token_idx\n", + " )\n", + " dec_logits = []\n", + "\n", + " for _ in range(self.target_maxlen - 1):\n", + " decoder_out = self._decoder_run(encoder_out, decoder_input)\n", + " logits = self.classifier(decoder_out)\n", + "\n", + " logits = torch.argmax(logits, dim=-1, keepdim=True)\n", + " last_logit = logits[:, -1]\n", + " dec_logits.append(last_logit)\n", + " decoder_input = torch.cat([decoder_input, last_logit], dim=-1)\n", + "\n", + " return decoder_input.squeeze(0) if len(source.shape) == 2 else decoder_input" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2023-12-11 03:21:41.460\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m24\u001b[0m - \u001b[31m\u001b[1m ERROR Message ==> Expected 2D (unbatched) or 3D (batched) input to conv1d, but got input of size: [1, 1, 128, 345]\u001b[0m\n", + "\u001b[33m\u001b[1mTraceback (most recent call last):\u001b[0m\n", + "\n", + " File \"\", line 198, in _run_module_as_main\n", + " File \"\", line 88, in _run_code\n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel_launcher.py\", line 17, in \n", + " app.launch_new_instance()\n", + " │ └ >\n", + " └ \n", + " └ \n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\kernelapp.py\", line 724, in start\n", + " self.io_loop.start()\n", + " │ │ └ \n", + " │ └ \n", + " └ \n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tornado\\platform\\asyncio.py\", line 215, in start\n", + " self.asyncio_loop.run_forever()\n", + " │ │ └ \n", + " │ └ <_WindowsSelectorEventLoop running=True closed=False debug=False>\n", + " └ \n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 607, in run_forever\n", + " self._run_once()\n", + " │ └ \n", + " └ <_WindowsSelectorEventLoop running=True closed=False debug=False>\n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\base_events.py\", line 1922, in _run_once\n", + " handle._run()\n", + " │ └ \n", + " └ , ...],))>)>\n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\asyncio\\events.py\", line 80, in _run\n", + " self._context.run(self._callback, *self._args)\n", + " │ │ │ │ │ └ \n", + " │ │ │ │ └ , ...],))>)>\n", + " │ │ │ └ \n", + " │ │ └ , ...],))>)>\n", + " │ └ \n", + " └ , ...],))>)>\n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\kernelbase.py\", line 512, in dispatch_queue\n", + " await self.process_one()\n", + " │ └ \n", + " └ \n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\kernelbase.py\", line 501, in process_one\n", + " await dispatch(*args)\n", + " │ └ ([, , >\n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\kernelbase.py\", line 408, in dispatch_shell\n", + " await result\n", + " └ \n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\kernelbase.py\", line 731, in execute_request\n", + " reply_content = await reply_content\n", + " └ \n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\ipkernel.py\", line 417, in do_execute\n", + " res = shell.run_cell(\n", + " │ └ \n", + " └ \n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\ipykernel\\zmqshell.py\", line 540, in run_cell\n", + " return super().run_cell(*args, **kwargs)\n", + " │ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/c%3A/Main%20Workspace/Cohort8-Ransom-Kuti-Ladipo/l...\n", + " └ ('# Create a sample input\\nbatch_source_sequence = torch.randn(2, 128, 345) # Sample source sequence (batch_size, maxlen, nu...\n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 2945, in run_cell\n", + " result = self._run_cell(\n", + " │ └ \n", + " └ \n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 3000, in _run_cell\n", + " return runner(coro)\n", + " │ └ \n", + " └ \n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\IPython\\core\\async_helpers.py\", line 129, in _pseudo_sync_runner\n", + " coro.send(None)\n", + " │ └ \n", + " └ \n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 3203, in run_cell_async\n", + " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n", + " │ │ │ │ └ 'C:\\\\Users\\\\Yinka\\\\AppData\\\\Local\\\\Temp\\\\ipykernel_3812\\\\4012752373.py'\n", + " │ │ │ └ [, , \n", + " │ └ \n", + " └ \n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 3382, in run_ast_nodes\n", + " if await self.run_code(code, result, async_=asy):\n", + " │ │ │ │ └ False\n", + " │ │ │ └ at 0x0000017533D041B0, file \"C:\\Users\\Yinka\\AppData\\Local\\Temp\\ipykernel_3812\\4012752373.py\", line 1>\n", + " │ └ \n", + " └ \n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 3442, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " │ │ │ │ └ {'__name__': '__main__', '__doc__': '\\nBaseline Transformer Module\\n\\nThis module contains the implementation of a Transforme...\n", + " │ │ │ └ \n", + " │ │ └ \n", + " │ └ \n", + " └ at 0x0000017533D041B0, file \"C:\\Users\\Yinka\\AppData\\Local\\Temp\\ipykernel_3812\\4012752373.py\", line 1>\n", + "\n", + "> File \"\u001b[32mC:\\Users\\Yinka\\AppData\\Local\\Temp\\ipykernel_3812\\\u001b[0m\u001b[32m\u001b[1m4012752373.py\u001b[0m\", line \u001b[33m18\u001b[0m, in \u001b[35m\u001b[0m\n", + " \u001b[1mpredictions\u001b[0m \u001b[35m\u001b[1m=\u001b[0m \u001b[1mtransformer_model\u001b[0m\u001b[1m(\u001b[0m\u001b[1msingle_src_seq\u001b[0m\u001b[1m,\u001b[0m \u001b[1msingle_trg_seq\u001b[0m\u001b[1m)\u001b[0m\n", + " \u001b[36m │ │ └ \u001b[0m\u001b[36m\u001b[1mtensor([53, 3, 20, 29, 50, 25, 29, 13, 15, 32, 10, 15, 32, 36, 23, 50, 53, 5,\u001b[0m\n", + " \u001b[36m │ │ \u001b[0m\u001b[36m\u001b[1m 43, 37, 41, 58, 30, 12, 6, 40, 31, 4...\u001b[0m\n", + " \u001b[36m │ └ \u001b[0m\u001b[36m\u001b[1mtensor([[0.4061, 0.6164, 0.6337, ..., 0.5066, 0.4483, 0.4846],\u001b[0m\n", + " \u001b[36m │ \u001b[0m\u001b[36m\u001b[1m [0.0074, 0.1099, 0.6021, ..., 0.5536, 0.7102, 0.4944...\u001b[0m\n", + " \u001b[36m └ \u001b[0m\u001b[36m\u001b[1mASLTransformer(\u001b[0m\n", + " \u001b[36m \u001b[0m\u001b[36m\u001b[1m (encoder_input): LandmarkEmbedding(\u001b[0m\n", + " \u001b[36m \u001b[0m\u001b[36m\u001b[1m (conv1): Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,)...\u001b[0m\n", + "\n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1501, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " │ │ └ {}\n", + " │ └ (tensor([[0.4061, 0.6164, 0.6337, ..., 0.5066, 0.4483, 0.4846],\n", + " │ [0.0074, 0.1099, 0.6021, ..., 0.5536, 0.7102, 0.494...\n", + " └ \u001b[0m\n", + " \u001b[36m └ \u001b[0m\u001b[36m\u001b[1m\u001b[0m\n", + "\n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1501, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " │ │ └ {}\n", + " │ └ (tensor([[[[0.4061, 0.6164, 0.6337, ..., 0.5066, 0.4483, 0.4846],\n", + " │ [0.0074, 0.1099, 0.6021, ..., 0.5536, 0.7102, 0...\n", + " └ \n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\conv.py\", line 313, in forward\n", + " return self._conv_forward(input, self.weight, self.bias)\n", + " │ │ │ │ └ Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,))\n", + " │ │ │ └ Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,))\n", + " │ │ └ tensor([[[[0.4061, 0.6164, 0.6337, ..., 0.5066, 0.4483, 0.4846],\n", + " │ │ [0.0074, 0.1099, 0.6021, ..., 0.5536, 0.7102, 0....\n", + " │ └ \n", + " └ Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,))\n", + " File \"c:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\conv.py\", line 309, in _conv_forward\n", + " return F.conv1d(input, weight, bias, self.stride,\n", + " │ │ │ │ │ │ └ (2,)\n", + " │ │ │ │ │ └ Conv1d(1, 64, kernel_size=(11,), stride=(2,), padding=(5,))\n", + " │ │ │ │ └ Parameter containing:\n", + " │ │ │ │ tensor([-0.1697, -0.2340, -0.0869, 0.0877, -0.2328, 0.2527, -0.2036, 0.0177,\n", + " │ │ │ │ -0.2673, -0.069...\n", + " │ │ │ └ Parameter containing:\n", + " │ │ │ tensor([[[-0.1422, -0.0277, 0.0092, -0.0871, 0.2008, -0.2052, -0.2548,\n", + " │ │ │ 0.0125, 0.0531, 0...\n", + " │ │ └ tensor([[[[0.4061, 0.6164, 0.6337, ..., 0.5066, 0.4483, 0.4846],\n", + " │ │ [0.0074, 0.1099, 0.6021, ..., 0.5536, 0.7102, 0....\n", + " │ └ \n", + " └ {error}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([128, 346])\n" + ] + } + ], + "source": [ + "import torch\n", + "\n", + "# Your original sequence tensor\n", + "original_tensor = torch.randn(118, 346)\n", + "\n", + "# Define the desired output shape\n", + "desired_shape = (128, 346)\n", + "\n", + "# Calculate the padding on the first dimension from the bottom\n", + "padding_bottom = max(0, desired_shape[0] - original_tensor.size(0))\n", + "\n", + "# Pad the tensor along the first dimension from the bottom\n", + "padded_tensor = torch.nn.functional.pad(original_tensor, (0, 0, 0, padding_bottom))\n", + "\n", + "# Now, padded_tensor has the shape (128, 346)\n", + "print(padded_tensor.shape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[ 0.0490, 1.5782, -0.0793, -0.8889, -0.6999],\n", + " [ 0.3881, 1.1002, -0.7594, -1.0423, 1.1450],\n", + " [ 2.1911, 0.6852, 0.7096, -1.1343, -0.3205]])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "original_tensor[115:,:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[ 0.0490, 1.5782, -0.0793, -0.8889, -0.6999],\n", + " [ 0.3881, 1.1002, -0.7594, -1.0423, 1.1450],\n", + " [ 2.1911, 0.6852, 0.7096, -1.1343, -0.3205],\n", + " [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],\n", + " [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],\n", + " [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],\n", + " [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],\n", + " [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],\n", + " [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],\n", + " [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],\n", + " [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],\n", + " [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],\n", + " [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "padded_tensor[115:,:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From c3a78ff9241405f0630be2ccd4e6fc6c2b27be89 Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Mon, 11 Dec 2023 06:09:07 +0100 Subject: [PATCH 12/16] [add] added frame preprocess test --- linguify_yb/src/tests/test_data_ingestion.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/linguify_yb/src/tests/test_data_ingestion.py b/linguify_yb/src/tests/test_data_ingestion.py index b4e86067..e28f43ab 100644 --- a/linguify_yb/src/tests/test_data_ingestion.py +++ b/linguify_yb/src/tests/test_data_ingestion.py @@ -1,8 +1,18 @@ "doc" -import torch import pytest +import torch +from torch.utils.data import DataLoader +from src.dataset.frames_config import FRAME_LEN +from src.dataset.preprocess import clean_frames_process + -def test_data_columns(): - pass +@pytest.mark.parametrize( + "frames", + [torch.randn(num_frames, 345) for num_frames in [10, 108, 128, 156, 750, 420]], +) +def test_frames_preprocess(frames): + clean_frames = clean_frames_process(frames) + expected_output_shape = (128, 345) + assert expected_output_shape == clean_frames.shape From f92ba03f4a11920d619364c9ded239066d2f428c Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Mon, 11 Dec 2023 06:09:19 +0100 Subject: [PATCH 13/16] [add] updates --- linguify_yb/src/tests/test_model.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/linguify_yb/src/tests/test_model.py b/linguify_yb/src/tests/test_model.py index c1206d1c..aca36478 100644 --- a/linguify_yb/src/tests/test_model.py +++ b/linguify_yb/src/tests/test_model.py @@ -7,6 +7,7 @@ from torch.utils.data import DataLoader from src.models.baseline_transformer import ASLTransformer + @pytest.fixture def baseline_model(): """_summary_""" @@ -24,7 +25,8 @@ def test_baseline_transformer_output_shape(baseline_model, inputs_x, target_y): expected_output_shape = (64, 62) assert output.shape == expected_output_shape -@pytest.mark.parametrize("inputs_x", [(torch.randn(128,345)), (torch.randn(128,345))]) + +@pytest.mark.parametrize("inputs_x", [(torch.randn(128, 345)), (torch.randn(128, 345))]) def test_baseline_transformer_generate_out( baseline_model, inputs_x, @@ -35,5 +37,21 @@ def test_baseline_transformer_generate_out( expected_output_len = 64 assert len(output) == expected_output_len -@pytest.mark.parametrize("input_shape", [(batch_size, input_dim) for batch_size in [1, 2, 5] for input_dim in [32, 64, 128]]) -def \ No newline at end of file + +@pytest.mark.parametrize( + "inputs_x, target_y, batch_size", + [ + ( + torch.randn(batch_size, 128, 345), + torch.randint(0, 60, (batch_size, 64)), + batch_size, + ) + for batch_size in [1, 2, 4, 8] + ], +) +def test_baseline_transformer_batch_shape( + baseline_model, inputs_x, target_y, batch_size +): + output = baseline_model(inputs_x, target_y) + expected_output_shape = (batch_size, 64, 62) + assert output.shape == expected_output_shape From 28101b8dfed25be41250a2275639542b3a87c699 Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Mon, 11 Dec 2023 06:10:01 +0100 Subject: [PATCH 14/16] [add] mics --- linguify_yb/src/dataset/frames_config.py | 94 +++++++++++++++--------- linguify_yb/src/dataset/preprocess.py | 91 +++++------------------ 2 files changed, 79 insertions(+), 106 deletions(-) diff --git a/linguify_yb/src/dataset/frames_config.py b/linguify_yb/src/dataset/frames_config.py index 98fb2d3b..a5b94ef1 100644 --- a/linguify_yb/src/dataset/frames_config.py +++ b/linguify_yb/src/dataset/frames_config.py @@ -1,43 +1,71 @@ """doc """ -FRAME_LEN = 128 -LPOSE = [13, 15, 17, 19, 21] -RPOSE = [14, 16, 18, 20, 22] -POSE = LPOSE + RPOSE +FRAME_LEN = 128 +LIP = [ + 61, + 185, + 40, + 39, + 37, + 0, + 267, + 269, + 270, + 409, + 291, + 146, + 91, + 181, + 84, + 17, + 314, + 405, + 321, + 375, + 78, + 191, + 80, + 81, + 82, + 13, + 312, + 311, + 310, + 415, + 95, + 88, + 178, + 87, + 14, + 317, + 402, + 318, + 324, + 308, +] -X = ( +FRAME = ["frame"] +N_LHAND = ( + [f"x_left_hand_{i}" for i in range(21)] + + [f"y_left_hand_{i}" for i in range(21)] + + [f"z_left_hand_{i}" for i in range(21)] +) +N_RHAND = ( [f"x_right_hand_{i}" for i in range(21)] - + [f"x_left_hand_{i}" for i in range(21)] - + [f"x_pose_{i}" for i in POSE] + + [f"y_right_hand_{i}" for i in range(21)] + + [f"z_right_hand_{i}" for i in range(21)] ) -Y = ( - [f"y_right_hand_{i}" for i in range(21)] - + [f"y_left_hand_{i}" for i in range(21)] - + [f"y_pose_{i}" for i in POSE] +N_POSE = ( + [f"x_pose_{i}" for i in range(33)] + + [f"y_pose_{i}" for i in range(33)] + + [f"z_pose_{i}" for i in range(33)] ) -Z = ( - [f"z_right_hand_{i}" for i in range(21)] - + [f"z_left_hand_{i}" for i in range(21)] - + [f"z_pose_{i}" for i in POSE] +N_FACE = ( + [f"x_face_{i}" for i in LIP] + + [f"y_face_{i}" for i in LIP] + + [f"z_face_{i}" for i in LIP] ) -FEATURE_COLUMNS = X + Y + Z - -X_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if "x_" in col] -Y_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if "y_" in col] -Z_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if "z_" in col] - -RHAND_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if "right" in col] -LHAND_IDX = [i for i, col in enumerate(FEATURE_COLUMNS) if "left" in col] -RPOSE_IDX = [ - i - for i, col in enumerate(FEATURE_COLUMNS) - if "pose" in col and int(col[-2:]) in RPOSE -] -LPOSE_IDX = [ - i - for i, col in enumerate(FEATURE_COLUMNS) - if "pose" in col and int(col[-2:]) in LPOSE -] +FEATURE_COLUMNS = FRAME + N_LHAND + N_RHAND + N_POSE + N_FACE diff --git a/linguify_yb/src/dataset/preprocess.py b/linguify_yb/src/dataset/preprocess.py index 9339f071..21a67e64 100644 --- a/linguify_yb/src/dataset/preprocess.py +++ b/linguify_yb/src/dataset/preprocess.py @@ -3,83 +3,28 @@ import torch from torch.nn import functional as F -from linguify_yb.src.dataset.frames_config import (FRAME_LEN, LHAND_IDX, - LPOSE_IDX, RHAND_IDX, - RPOSE_IDX) +# from dataset.frames_config import FRAME_LEN # TODO Clean up code, add comments and docs # TODO remove print and debug statements -# Preprocess frame - -def resize_pad(x): - if x.shape[0] < FRAME_LEN: - x = F.pad(x, (0, 0, 0, FRAME_LEN - x.shape[0], 0, 0)) - else: - x = x.unsqueeze(0) # Add batch and channel dimensions - x = torch.nn.functional.interpolate( - x, size=(FRAME_LEN, x.shape[1]), mode="bilinear", align_corners=False - ).squeeze(0) - - return x - - -def frames_preprocess(x): - x = torch.tensor(x) - rhand = x[:, RHAND_IDX] - lhand = x[:, LHAND_IDX] - rpose = x[:, RPOSE_IDX] - lpose = x[:, LPOSE_IDX] - - rnan_idx = torch.any(torch.isnan(rhand), dim=1) - lnan_idx = torch.any(torch.isnan(lhand), dim=1) - - rnans = torch.sum(rnan_idx) - lnans = torch.sum(lnan_idx) - - if rnans > lnans: - hand = lhand - pose = lpose - - hand_x = hand[:, 0 * (len(LHAND_IDX) // 3) : 1 * (len(LHAND_IDX) // 3)] - hand_y = hand[:, 1 * (len(LHAND_IDX) // 3) : 2 * (len(LHAND_IDX) // 3)] - hand_z = hand[:, 2 * (len(LHAND_IDX) // 3) : 3 * (len(LHAND_IDX) // 3)] - hand = torch.cat([1 - hand_x, hand_y, hand_z], dim=1) - - pose_x = pose[:, 0 * (len(LPOSE_IDX) // 3) : 1 * (len(LPOSE_IDX) // 3)] - pose_y = pose[:, 1 * (len(LPOSE_IDX) // 3) : 2 * (len(LPOSE_IDX) // 3)] - pose_z = pose[:, 2 * (len(LPOSE_IDX) // 3) : 3 * (len(LPOSE_IDX) // 3)] - pose = torch.cat([1 - pose_x, pose_y, pose_z], dim=1) - else: - hand = rhand - pose = rpose - - hand_x = hand[:, 0 * (len(LHAND_IDX) // 3) : 1 * (len(LHAND_IDX) // 3)] - hand_y = hand[:, 1 * (len(LHAND_IDX) // 3) : 2 * (len(LHAND_IDX) // 3)] - hand_z = hand[:, 2 * (len(LHAND_IDX) // 3) : 3 * (len(LHAND_IDX) // 3)] - hand = torch.cat( - [hand_x.unsqueeze(-1), hand_y.unsqueeze(-1), hand_z.unsqueeze(-1)], dim=-1 - ) - - mean = torch.mean(hand, dim=1).unsqueeze(1) - std = torch.std(hand, dim=1).unsqueeze(1) - hand = (hand - mean) / std - - pose_x = pose[:, 0 * (len(LPOSE_IDX) // 3) : 1 * (len(LPOSE_IDX) // 3)] - pose_y = pose[:, 1 * (len(LPOSE_IDX) // 3) : 2 * (len(LPOSE_IDX) // 3)] - pose_z = pose[:, 2 * (len(LPOSE_IDX) // 3) : 3 * (len(LPOSE_IDX) // 3)] - pose = torch.cat( - [pose_x.unsqueeze(-1), pose_y.unsqueeze(-1), pose_z.unsqueeze(-1)], dim=-1 - ) - - x = torch.cat([hand, pose], dim=1) - #print(f"befor re{x.shape}") - x = resize_pad(x) - #print(f"after re{x.shape}") +def clean_frames_process( + x, max_frame_len=128, n_hand_landmarks=21, n_pose_landmarks=33, n_face_landmarks=40 +): + x = x[:max_frame_len] x = torch.where(torch.isnan(x), torch.zeros_like(x), x) - #print(x.shape) - - #! CRITICAL Debug - # x = x.view(FRAME_LEN, len(LHAND_IDX) + len(LPOSE_IDX)) + n_frames = x.size(0) + lhand = x[:, 0:63].view(n_frames, 3, n_hand_landmarks).transpose(1, 2) + rhand = x[:, 63:126].view(n_frames, 3, n_hand_landmarks).transpose(1, 2) + pose = x[:, 126:225].view(n_frames, 3, n_pose_landmarks).transpose(1, 2) + face = x[:, 225:345].view(n_frames, 3, n_face_landmarks).transpose(1, 2) + + x = torch.cat([lhand, rhand, pose, face], axis=1) + x = x.view(n_frames, 345) + if n_frames < max_frame_len: + # Calculate the padding on the first dimension from the bottom + padding_bottom = max(0, max_frame_len - x.size(0)) + # Pad the tensor along the first dimension from the bottom + x = F.pad(x, (0, 0, 0, padding_bottom)) return x From 5383afe3e9b8677fde9a82f0b4a4ab57e065a9cb Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Tue, 12 Dec 2023 17:28:56 +0100 Subject: [PATCH 15/16] updates --- linguify_yb/development/code_dev.ipynb | 44 +++++- linguify_yb/development/dev.ipynb | 3 +- linguify_yb/src/benchmark.py | 7 + linguify_yb/src/config.py | 6 + linguify_yb/src/dataset/dataset_loader.py | 119 +++++++++++++++ linguify_yb/src/dataset/frames_config.py | 1 + linguify_yb/src/dataset/preprocess.py | 2 - linguify_yb/src/dev_data.py | 123 +++++++++++++++ linguify_yb/src/main.py | 101 +++++++++++++ .../src/models/baseline_transformer.py | 20 +-- linguify_yb/src/models/model_loader.py | 31 +--- linguify_yb/src/models/static_transfromer.py | 0 linguify_yb/src/tests/test_data_ingestion.py | 16 ++ linguify_yb/src/trainer.py | 141 ++++++++++++++++++ linguify_yb/src/utils/util.py | 77 ++++++++++ version.txt | 0 16 files changed, 650 insertions(+), 41 deletions(-) create mode 100644 linguify_yb/src/config.py create mode 100644 linguify_yb/src/dataset/dataset_loader.py create mode 100644 linguify_yb/src/dev_data.py create mode 100644 linguify_yb/src/main.py create mode 100644 linguify_yb/src/models/static_transfromer.py create mode 100644 linguify_yb/src/trainer.py create mode 100644 linguify_yb/src/utils/util.py create mode 100644 version.txt diff --git a/linguify_yb/development/code_dev.ipynb b/linguify_yb/development/code_dev.ipynb index a41f02c2..251ebeaf 100644 --- a/linguify_yb/development/code_dev.ipynb +++ b/linguify_yb/development/code_dev.ipynb @@ -129,12 +129,54 @@ "]\n" ] }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(63, 63)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(RHAND_IDX), len(LHAND_IDX)" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def read_file(file, file_id, landmarks_metadata_path):\n", + " phrase_list = []\n", + " frames_list = []\n", + " metadata_train_dataframe = pd.read_csv(landmarks_metadata_path)\n", + " file_id_df = metadata_train_dataframe.loc[\n", + " metadata_train_dataframe[\"file_id\"] == file_id\n", + " ]\n", + " saved_parueat_df = pq.read_table(\n", + " file, columns=[\"sequence_id\"] + FEATURE_COLUMNS\n", + " ).to_pandas()\n", + " for seq_id, phrase in zip(file_id_df.sequence_id, file_id_df.phrase):\n", + " frames = saved_parueat_df[saved_parueat_df.index == seq_id].to_numpy()\n", + " # NaN\n", + " right_num_nan = np.sum(np.sum(np.isnan(frames[:, RHAND_IDX]), axis=1) == 0)\n", + " left_num_nan = np.sum(np.sum(np.isnan(frames[:, LHAND_IDX]), axis=1) == 0)\n", + " \n", + " total_num_nan = max(right_num_nan, left_num_nan)\n", + " if 2 * len(phrase) < total_num_nan:\n", + " frames_list.append(frames)\n", + " phrase_list.append(phrase)\n", + " return (frames_list, phrase_list)\n" + ] } ], "metadata": { diff --git a/linguify_yb/development/dev.ipynb b/linguify_yb/development/dev.ipynb index 9224fe47..07faeb77 100644 --- a/linguify_yb/development/dev.ipynb +++ b/linguify_yb/development/dev.ipynb @@ -42,8 +42,7 @@ "metadata": {}, "outputs": [], "source": [ - "ans = [(torch.randn(batch_size, 128, 345), torch.randint(0, 60, (batch_size, 64)), batch_size) \n", - " for batch_size in [1,2,4,8]]" + "torch.is_distri" ] }, { diff --git a/linguify_yb/src/benchmark.py b/linguify_yb/src/benchmark.py index 4c710533..9b83888a 100644 --- a/linguify_yb/src/benchmark.py +++ b/linguify_yb/src/benchmark.py @@ -11,10 +11,15 @@ class BenchMarker: + """_summary_""" + def __init__(self) -> None: pass def get_model_macs(self, model, inputs=None) -> int: + """ + calculate the MACS of a model + """ return profile_macs(model, inputs) def get_model_sparsity(self, model: nn.Module) -> float: @@ -56,3 +61,5 @@ def runner(self, model): model_sparsity = self.get_model_sparsity(model) model_num_params = self.get_num_parameters(model) model_size = self.get_model_size(model) + + return diff --git a/linguify_yb/src/config.py b/linguify_yb/src/config.py new file mode 100644 index 00000000..98a1d336 --- /dev/null +++ b/linguify_yb/src/config.py @@ -0,0 +1,6 @@ +"""doc +""" +from pydantic import BaseModel + +class Data(BaseModel): + \ No newline at end of file diff --git a/linguify_yb/src/dataset/dataset_loader.py b/linguify_yb/src/dataset/dataset_loader.py new file mode 100644 index 00000000..947531f8 --- /dev/null +++ b/linguify_yb/src/dataset/dataset_loader.py @@ -0,0 +1,119 @@ +"""doc +""" + +import json + +import numpy as np +import pandas as pd +import pyarrow.parquet as pq +import torch +from torch.nn import functional as F +from torch.utils.data import DataLoader, Dataset + +from dataset.frames_config import FEATURE_COLUMNS, FRAME_LEN, LHAND_IDX, RHAND_IDX +from dataset.preprocess import clean_frames_process + +PHRASE_PATH = "/kaggle/input/asl-fingerspelling/character_to_prediction_index.json" +METADATA = "/kaggle/input/asl-fingerspelling/train.csv" + +with open(PHRASE_PATH, "r", encoding="utf-8") as f: + character_to_num = json.load(f) + +PAD_TOKEN = "P" +START_TOKEN = "<" +END_TOKEN = ">" +PAD_TOKEN_IDX = 59 +START_TOKEN_IDX = 60 +END_TOKEN_IDX = 61 + +character_to_num[PAD_TOKEN] = PAD_TOKEN_IDX +character_to_num[START_TOKEN] = START_TOKEN_IDX +character_to_num[END_TOKEN] = END_TOKEN_IDX +num_to_character = {j: i for i, j in character_to_num.items()} + + +class TokenHashTable: + def __init__( + self, word2index_mapping=character_to_num, index2word_mapping=num_to_character + ): + self.word2index = word2index_mapping + self.index2word = index2word_mapping + + def _indexesfromsentence(self, sentence): + return [self.word2index[word] for word in sentence] + + def tensorfromsentence(self, sentence): + indexes = self._indexesfromsentence(sentence) + return torch.tensor(indexes, dtype=torch.long) # .view(1, -1) + + def indexes_to_sentence(self, indexes_list): + if torch.is_tensor(indexes_list): + indexes_list = indexes_list.tolist() + words = [self.index2word[idx] for idx in indexes_list] + return words + + +def read_file(file, file_id, landmarks_metadata_path): + phrase_list = [] + frames_list = [] + metadata_train_dataframe = pd.read_csv(landmarks_metadata_path) + file_id_df = metadata_train_dataframe.loc[ + metadata_train_dataframe["file_id"] == file_id + ] + saved_parueat_df = pq.read_table( + file, columns=["sequence_id"] + FEATURE_COLUMNS + ).to_pandas() + for seq_id, phrase in zip(file_id_df.sequence_id, file_id_df.phrase): + frames = saved_parueat_df[saved_parueat_df.index == seq_id].to_numpy() + # NaN + frames_list.append(torch.tensor(frames)) + phrase_list.append(phrase) + return (frames_list, phrase_list) + + +class LandmarkDataset(Dataset): + def __init__(self, file_path, file_id, table, transform=True): + self.landmarks_metadata_path = METADATA + self.frames, self.labels = read_file( + file_path, file_id, self.landmarks_metadata_path + ) + self.trans = transform + self.table = table + + def _label_pre(self, label_sample): + sample = START_TOKEN + label_sample + END_TOKEN + new_phrase = self.table.tensorfromsentence(list(sample)) + ans = F.pad( + input=new_phrase, + pad=[0, 64 - new_phrase.shape[0]], + mode="constant", + value=PAD_TOKEN_IDX, + ) + return ans + + def __len__(self): + return len(self.labels) + + def __getitem__(self, idx): + if torch.is_tensor(idx): + idx = idx.tolist() + phrase = self.labels[idx] + frames = self.frames[idx] + + if self.trans: + phrase = self._label_pre(phrase) + frames = clean_frames_process(frames) + return frames, phrase + + +def get_dataloader(file_path, file_id, batch_size=32, num_workers_=1): + lookup_table = TokenHashTable(character_to_num, num_to_character) + dataset = LandmarkDataset(file_path, file_id, lookup_table, transform=True) + + dataloader = DataLoader( + dataset, + batch_size=batch_size, + num_workers=num_workers_, + pin_memory=True, + ) + return dataloader diff --git a/linguify_yb/src/dataset/frames_config.py b/linguify_yb/src/dataset/frames_config.py index a5b94ef1..2f277968 100644 --- a/linguify_yb/src/dataset/frames_config.py +++ b/linguify_yb/src/dataset/frames_config.py @@ -52,6 +52,7 @@ + [f"y_left_hand_{i}" for i in range(21)] + [f"z_left_hand_{i}" for i in range(21)] ) + N_RHAND = ( [f"x_right_hand_{i}" for i in range(21)] + [f"y_right_hand_{i}" for i in range(21)] diff --git a/linguify_yb/src/dataset/preprocess.py b/linguify_yb/src/dataset/preprocess.py index 21a67e64..192dd73a 100644 --- a/linguify_yb/src/dataset/preprocess.py +++ b/linguify_yb/src/dataset/preprocess.py @@ -3,8 +3,6 @@ import torch from torch.nn import functional as F -# from dataset.frames_config import FRAME_LEN - # TODO Clean up code, add comments and docs # TODO remove print and debug statements diff --git a/linguify_yb/src/dev_data.py b/linguify_yb/src/dev_data.py new file mode 100644 index 00000000..a861d8b0 --- /dev/null +++ b/linguify_yb/src/dev_data.py @@ -0,0 +1,123 @@ +"""Dataset Download Module + +This module provides functions to download the a subsample of Google ASL dataset. + +Functions: +- download_dataset(url: str, destination: str, path): + Downloads a dataset from the given URL to the specified destination directory. +- main - the main function to run the script +""" + + +import os +import shutil +import subprocess +import zipfile + +from utils.logger_util import logger + +DATA_DIR = "data/asl-fingerspelling/" +data_files = ["train.csv", "character_to_prediction_index.json"] +train_landmarks = ["1019715464.parquet", "1021040628.parquet", "105143404.parquet"] +TRAIN_LANDMARKS_DIR = "train_landmarks/" + +COMMAND = [ + "kaggle", + "competitions", + "download", + "-c", + "asl-fingerspelling", + "-f", + "FILE", + "-p", + "data/raw/", +] + + +def check_storage(project_dir=os.getcwd()): + """check and return availabe storage space + + Parameters + ---------- + directory_path : str, Path + current working directory/directory path + + Returns + ------- + int + the size of available storage space (GB) + + Raises + ------ + StorageFullError + exception for when storage is full. + """ + total, used, free = shutil.disk_usage(project_dir) + total_size_gb = round(total / (2**30), 2) + used_size_gb = round(used / (2**30), 2) + free_size_gb = round(free / (2**30), 2) + if used_size_gb / total_size_gb >= 0.8: + raise StorageFullError + return free_size_gb + + +class StorageFullError(Exception): + """Custom exception for when storage is full.""" + + pass + + +def downlaod_file(cmd, unzipped_file_path, data_dir): + """Download file using kaggle API + + Parameters + ---------- + cmd : list + Kaggle API Commands + unzipped_file : str, Path + path of the unzipped file + data_dir : str, Path + the directory where the data should be downloaded into + """ + subprocess.run(cmd, check=True, text=True) + if ( + os.path.exists(unzipped_file_path) + and os.path.splitext(unzipped_file_path)[1].lower() == ".zip" + ): + # Unzipping and delete the zipped file to free storage + with zipfile.ZipFile(unzipped_file_path, "r") as zip_ref: + zip_ref.extractall(data_dir) + os.remove(unzipped_file_path) + else: + pass + + +def main(): + """the main function to run the script""" + logger.info("Commencing downloading the dataset") + try: + logger.info(f"Current Available space {check_storage()}GB") + for file in data_files: + logger.info(f"Downloading{file} in {DATA_DIR}") + COMMAND[6] = file + unzipfile_path = DATA_DIR + file + ".zip" + downlaod_file(COMMAND, unzipfile_path, DATA_DIR) + logger.info(f" {file} downloaded succesful") + # Downloading the LANDMARKS files + for parquet_file in train_landmarks: + logger.info(f"Current Available space {check_storage()}GB") + file_path = TRAIN_LANDMARKS_DIR + parquet_file + COMMAND[6] = file_path + COMMAND[8] = DATA_DIR + TRAIN_LANDMARKS_DIR + unzipfile_path = DATA_DIR + file_path + ".zip" + downlaod_file(COMMAND, unzipfile_path, DATA_DIR + TRAIN_LANDMARKS_DIR) + logger.info(f"{parquet_file} downloaded succesfully") + + logger.success("All files downloaded succesfully") + + except Exception as error: + logger.exception(f"Data unloading was unsuccesfully due to {error}") + + +if __name__ == "__main__": + main() diff --git a/linguify_yb/src/main.py b/linguify_yb/src/main.py new file mode 100644 index 00000000..ac12cfc8 --- /dev/null +++ b/linguify_yb/src/main.py @@ -0,0 +1,101 @@ +""" +doc + +# Usage: +# python -m src/train.py \ +# --epochs 10 \ +# --batch 512 \ +""" +# TODO Complete and refactor code for distributed training + +import os +import json + +import numpy as np +import torch +import wandb +from torch import nn + +from utils.util import get_device_strategy, parse_args, set_seed +from utils.logger_util import logger +from models.model_loader import ModelLoader +from dataset.dataset_loader import get_dataloader +import trainer + +try: + dataset_paths = "data/dev_samples.json" # On kaggle replace with "data/dataset_paths.json" to train on full data + with open(dataset_paths, "r", encoding="utf-8") as json_file: + data_dict = json.load(json_file) + LANDMARK_DIR = "/kaggle/input/asl-fingerspelling/train_landmarks" + MODEL_DIR = "model.pt" + + # Training dataset + train_dataset = data_dict["train_files"] + train_file_ids = [os.path.basename(file) for file in train_dataset] + train_file_ids = [ + int(file_name.replace(".parquet", "")) for file_name in train_file_ids + ] + assert len(train_dataset) == len( + train_file_ids + ), "Failed import of Train files path " + TRAIN_DS_FILES = list(zip(train_dataset, train_file_ids)) + + # Validation dataset + valid_dataset = data_dict["valid_files"] + valid_file_ids = [os.path.basename(file) for file in valid_dataset] + valid_file_ids = [ + int(file_name.replace(".parquet", "")) for file_name in valid_file_ids + ] + assert len(train_dataset) == len( + train_file_ids + ), "Failed Import of Valid Files path" + VALID_DS_FILES = list(zip(valid_dataset, valid_file_ids)) +except AssertionError as asset_error: + logger.exception(f"failed {asset_error}") + + +def main(arg): + logger.info(f"Starting training on {arg.model}") + # To ensure reproducibility of the training process + set_seed() + DEVICE = get_device_strategy(tpu=arg.tpu) + logger.info(f"Training on {DEVICE} for {arg.epochs} epochs.") + + model = ModelLoader().get_model(arg.model) + + optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) + criterion = nn.CrossEntropyLoss(label_smoothing=0.1) + + # Optimizes given model/function using TorchDynamo and specified backend + torch.compile(model) + + logger.info("training") + wandb.init( + project="ASL-project", + config={ + "learning_rate": 0.01, + "architecture": "Test Model", + "dataset": "Google ASL Landmarks", + "epochs": 12, + }, + ) + + wandb.watch(model) + try: + train( + model=arg.model, + optim=optimizer, + loss_func=criterion, + n_epochs=arg.epochs, + batch=arg.batch, + device=DEVICE, + ) + logger.success(f"Training completed: {arg.epochs} epochs on {DEVICE}.") + + except Exception as error: + logger.exception(f"Training failed due to an {error}.") + + +if __name__ == "__main__": + args = parse_args() + main(args) diff --git a/linguify_yb/src/models/baseline_transformer.py b/linguify_yb/src/models/baseline_transformer.py index 2ab43725..5fc151e2 100644 --- a/linguify_yb/src/models/baseline_transformer.py +++ b/linguify_yb/src/models/baseline_transformer.py @@ -18,19 +18,19 @@ class TokenEmbedding(nn.Module): - """_summary_""" + """Embed the tokens with postion encoding""" def __init__(self, num_vocab, maxlen, embedding_dim): """_summary_ Parameters ---------- - num_vocab : _type_ - _description_ - maxlen : _type_ - _description_ - embedding_dim : _type_ - _description_ + num_vocab : int + number of vocabulary + maxlen : int + maximuin length of sequence + embedding_dim : int + embedding output dimension """ super().__init__() self.token_embed_layer = nn.Embedding(num_vocab, embedding_dim) @@ -41,12 +41,12 @@ def forward(self, x): Parameters ---------- - x : _type_ + x : tensors _description_ Returns ------- - _type_ + tensors _description_ """ maxlen = x.size(-1) @@ -79,7 +79,7 @@ def __init__(self, embedding_dim): self.embedding_layer = nn.Linear(256, embedding_dim) def forward(self, x): - # Input x should have shape (batch_size, input_size) + # Input x should have shape (batch_size, input_size, input_dim) x = x.unsqueeze(1) # Add a channel dimension for 1D convolution # Apply convolutional layers with ReLU activation and stride 2 diff --git a/linguify_yb/src/models/model_loader.py b/linguify_yb/src/models/model_loader.py index b09b7710..aabc97b7 100644 --- a/linguify_yb/src/models/model_loader.py +++ b/linguify_yb/src/models/model_loader.py @@ -1,16 +1,16 @@ """doc """ -import torch.nn as nn -from linguify_yb.src.models import baseline_transfomer, test_model + +from models.baseline_transformer import ASLTransformer + class ModelLoader: """Model Loader""" def __init__(self): self.models = { - "asl_transfomer": baseline_transfomer.build_model(), - "test_model": test_model.build_model(), + "asl_transfomer": ASLTransformer(), } def get_model(self, model_name): @@ -30,25 +30,4 @@ def get_model(self, model_name): if model_name in self.models: return self.models[model_name] else: - raise ValueError - - - -# For Debugging -class TestLinear(nn.Module): - def __init__( - self, - ) -> None: - super().__init__() - self.linear1 = nn.Linear(10, 100) - self.linear2 = nn.Linear(100, 10) - self.linear3 = nn.Linear(10, 2) - self.sequnn = nn.Sequential(self.linear1, self.linear2, self.linear3) - - def forward(self, input_x): - outs = self.linear1(input_x) - return outs - - -def build_model(): - return TestLinear() + raise ValueError("Model is not in the model list") diff --git a/linguify_yb/src/models/static_transfromer.py b/linguify_yb/src/models/static_transfromer.py new file mode 100644 index 00000000..e69de29b diff --git a/linguify_yb/src/tests/test_data_ingestion.py b/linguify_yb/src/tests/test_data_ingestion.py index e28f43ab..1a51fd08 100644 --- a/linguify_yb/src/tests/test_data_ingestion.py +++ b/linguify_yb/src/tests/test_data_ingestion.py @@ -7,6 +7,11 @@ from src.dataset.frames_config import FRAME_LEN from src.dataset.preprocess import clean_frames_process +# TODO test for frames in right shapes, in tensor, frames are normalize +# TODO test for frames dont contain NAN + +# TODO test for labels are tokensize + @pytest.mark.parametrize( "frames", @@ -16,3 +21,14 @@ def test_frames_preprocess(frames): clean_frames = clean_frames_process(frames) expected_output_shape = (128, 345) assert expected_output_shape == clean_frames.shape + +@pytest +def test_TokenHashTable(tokentable): + token_table = + sample_sentences = "" + sample_sentences_len = len(sample_sentences) + sample_sentences_token = [64,] + tokenize_result = token_table + assert sample_sentences_len == len(tokenize_result) + assert sample_sentences_token == tokenize_result + diff --git a/linguify_yb/src/trainer.py b/linguify_yb/src/trainer.py new file mode 100644 index 00000000..c9f191c3 --- /dev/null +++ b/linguify_yb/src/trainer.py @@ -0,0 +1,141 @@ +""" +doc + +# Usage: +# python -m src/train.py \ +# --epochs 10 \ +# --batch 512 \ +""" +# TODO Complete and refactor code for distributed training + +import os +import json + +import numpy as np +import torch +import wandb +from torch import nn + +from utils.logger_util import logger + + +def train(model, optim, loss_func, n_epochs, batch, device,): + + model.to(device) + + train_losses = [] + val_losses = [] + val_dataloader = # get_dataloader(TRAIN_FILES[0][0], TRAIN_FILES[0][1], batch_size=batch) + for epoch in range(n_epochs): + logger.info(f"Training on epoch {epoch}.") + total_epochs = epoch + file_train_loss = [] + for file, file_id in TRAIN_DS_FILES: + train_dataloader = # get_dataloader(file, file_id, batch_size=batch) + + # Performs training using mini-batches + train_loss = mini_batch( + model, train_dataloader, optim, loss_func, device, validation=False + ) + file_train_loss.append(train_loss) + train_loss = np.mean(file_train_loss) + train_losses.append(train_loss) + + # Performs evaluation using mini-batches + logger.info("Starting validation.") + with torch.no_grad(): + val_loss = mini_batch( + model, val_dataloader, optim, loss_func, device, validation=True + ) + val_losses.append(val_loss) + + wandb.log( + { + "train_loss": train_loss, + "val_loss": val_loss, + "epoch": epoch, + } + ) + + if epoch // 2 == 0: + logger.info("Initiating checkpoint. Saving model and optimizer states.") + save_checkpoint( + MODEL_DIR, model, optim, total_epochs, train_losses, val_losses + ) + + +def mini_batch( + model, dataloader, mini_batch_optim, loss_func, device, validation=False +): + # The mini-batch can be used with both loaders + # The argument `validation`defines which loader and + # corresponding step function is going to be used + if validation: + step_func = val_step_func(model, loss_func) + else: + step_func = train_step_func(model, mini_batch_optim, loss_func) + + # Once the data loader and step function, this is the same + # mini-batch loop we had before + mini_batch_losses = [] + for x_batch, y_batch in dataloader: + x_batch = x_batch.to(device) + y_batch = y_batch.to(device) + loss = step_func(x=x_batch, y=y_batch) + mini_batch_losses.append(loss) + loss = np.mean(mini_batch_losses) + return loss + + +def train_step_func(model, optim_, loss_func): + def perform_train_step_fn(x, y): + model.train() + preds = model(x) + loss = loss_func(preds, y) + loss.backward() + optim_.step() + optim_.zero_grad() + return loss.item() + + return perform_train_step_fn + + +def val_step_func(model, loss_func): + def perform_val_step_fn(x, y): + model.eval() + preds = model(x) + loss = loss_func(preds, y) + return loss.item() + + return perform_val_step_fn + + +def save_checkpoint(filename, model, optimizer, total_epochs, train_losses, val_losses): + # Builds dictionary with all elements for resuming training + checkpoint = { + "epoch": total_epochs, + "model_state_dict": model.state_dict(), + "optimizer_state_dict": optimizer.state_dict(), + "loss": train_losses, + "val_loss": val_losses, + } + + torch.save(checkpoint, filename) + + +def load_checkpoint(model, optimizer, filename): + # Loads dictionary + checkpoint = torch.load(filename) + + # Restore state for model and optimizer + model.load_state_dict(checkpoint["model_state_dict"]) + optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) + + total_epochs = checkpoint["epoch"] + losses = checkpoint["loss"] + val_losses = checkpoint["val_loss"] + return model + + +def distributed_stra_gpu(): + pass diff --git a/linguify_yb/src/utils/util.py b/linguify_yb/src/utils/util.py new file mode 100644 index 00000000..c4371b7b --- /dev/null +++ b/linguify_yb/src/utils/util.py @@ -0,0 +1,77 @@ +import os +import random +import argparse + +import numpy as np +import torch + +#import torch_xla.core.xla_model as xm + +def set_seed(seed: int = 42) -> None: + np.random.seed(seed) + random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + # When running on the CuDNN backend, two further options must be set + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + # Set a fixed value for the hash seed + os.environ["PYTHONHASHSEED"] = str(seed) + + +def get_device_strategy(tpu=False): + if tpu: + device = None #xm.xla_device() + else: + device = torch.device("cuda" if torch.cuda.is_availabe() else "cpu") + return device + + +def parse_args(): + """ + Parse arguments given to the script. + + Returns: + The parsed argument object. + """ + parser = argparse.ArgumentParser( + description="Run distributed data-parallel training and log with wandb." + ) + + parser.add_argument( + "--model", + default="asl_transfomer", + type=str, + metavar="N", + help="name of model to train", + ) + + parser.add_argument( + "--epochs", + default=2, + type=int, + metavar="N", + help="number of total epochs to run", + ) + parser.add_argument( + "--batch", + default=32, + type=int, + metavar="N", + help="number of data samples in one batch", + ) + parser.add_argument( + "--tpu", + default=False, + type=bool, + metavar="N", + help="Train on TPU Device", + ) + parser.add_argument( + "--resume_checkpoint", + type=bool, + help="Path to the checkpoint for resuming training", + ) + + args = parser.parse_args() + return args diff --git a/version.txt b/version.txt new file mode 100644 index 00000000..e69de29b From 0b5653758e506b61458f690be782aa636a598e4c Mon Sep 17 00:00:00 2001 From: rileydrizzy Date: Tue, 12 Dec 2023 23:37:08 +0100 Subject: [PATCH 16/16] [add] updates --- linguify_yb/development/dev.ipynb | 54 ++++++++++++++------ linguify_yb/src/dataset/dataset_loader.py | 6 +-- linguify_yb/src/tests/test_data_ingestion.py | 37 +++++++++----- linguify_yb/src/trainer.py | 4 -- 4 files changed, 66 insertions(+), 35 deletions(-) diff --git a/linguify_yb/development/dev.ipynb b/linguify_yb/development/dev.ipynb index 07faeb77..27c00cb4 100644 --- a/linguify_yb/development/dev.ipynb +++ b/linguify_yb/development/dev.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -38,33 +38,57 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "torch.is_distri" + "sample_sentence_token = [60,51,39,40,50,0,40,50,0,32,0,51,36,50,51,0,49,52,45,61]\n", + "# Padding the token\n", + "sample_sentence_token = sample_sentence_token + ([59] * (64 - len(sample_sentence_token)))\n", + "sample_sentence_token = torch.tensor(sample_sentence_token)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 20, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "torch.Size([1, 128, 345]) torch.Size([1, 64]) 1\n", - "torch.Size([2, 128, 345]) torch.Size([2, 64]) 2\n", - "torch.Size([4, 128, 345]) torch.Size([4, 64]) 4\n", - "torch.Size([8, 128, 345]) torch.Size([8, 64]) 8\n" - ] + "data": { + "text/plain": [ + "64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(sample_sentence_token)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([60, 51, 39, 40, 50, 0, 40, 50, 0, 32, 0, 51, 36, 50, 51, 0, 49, 52,\n", + " 45, 61, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,\n", + " 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,\n", + " 59, 59, 59, 59, 59, 59, 59, 59, 59, 59])" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "for b in ans:\n", - " x, y , bs = b\n", - " print(x.shape, y.shape,bs)" + "sample_sentence_token" ] }, { diff --git a/linguify_yb/src/dataset/dataset_loader.py b/linguify_yb/src/dataset/dataset_loader.py index 947531f8..afaa302f 100644 --- a/linguify_yb/src/dataset/dataset_loader.py +++ b/linguify_yb/src/dataset/dataset_loader.py @@ -42,11 +42,11 @@ def __init__( def _indexesfromsentence(self, sentence): return [self.word2index[word] for word in sentence] - def tensorfromsentence(self, sentence): + def sentence_to_tensor(self, sentence): indexes = self._indexesfromsentence(sentence) - return torch.tensor(indexes, dtype=torch.long) # .view(1, -1) + return torch.tensor(indexes, dtype=torch.long) - def indexes_to_sentence(self, indexes_list): + def index_to_sentence(self, indexes_list): if torch.is_tensor(indexes_list): indexes_list = indexes_list.tolist() words = [self.index2word[idx] for idx in indexes_list] diff --git a/linguify_yb/src/tests/test_data_ingestion.py b/linguify_yb/src/tests/test_data_ingestion.py index 1a51fd08..aaa5eedf 100644 --- a/linguify_yb/src/tests/test_data_ingestion.py +++ b/linguify_yb/src/tests/test_data_ingestion.py @@ -6,6 +6,7 @@ from torch.utils.data import DataLoader from src.dataset.frames_config import FRAME_LEN from src.dataset.preprocess import clean_frames_process +from src.dataset.dataset_loader import TokenHashTable # TODO test for frames in right shapes, in tensor, frames are normalize # TODO test for frames dont contain NAN @@ -18,17 +19,27 @@ [torch.randn(num_frames, 345) for num_frames in [10, 108, 128, 156, 750, 420]], ) def test_frames_preprocess(frames): - clean_frames = clean_frames_process(frames) + """doc""" + frames = clean_frames_process(frames) expected_output_shape = (128, 345) - assert expected_output_shape == clean_frames.shape - -@pytest -def test_TokenHashTable(tokentable): - token_table = - sample_sentences = "" - sample_sentences_len = len(sample_sentences) - sample_sentences_token = [64,] - tokenize_result = token_table - assert sample_sentences_len == len(tokenize_result) - assert sample_sentences_token == tokenize_result - + assert ( + expected_output_shape == frames.shape + ), f"frames shape should be {expected_output_shape}" + + +def test_token_hash_table(): + token_table = TokenHashTable() + sample_sentence = "this is a test run" + sample_sentence_len = len(sample_sentence) + sample_sentence_token = [60,51,39,40,50,0,40,50,0,32,0,51,36,50,51,0,49,52,45,61] + # Padding the token + sample_sentence_token = sample_sentence_token + ( + [59] * (64 - len(sample_sentence_token)) + ) + sample_sentence_token = torch.tensor(sample_sentence_token) + tokenize_result = token_table.sentence_to_tensor(sample_sentence) + assert sample_sentence_len == len(tokenize_result) + assert sample_sentence_token == tokenize_result + + # Assert that clean_frames is a PyTorch tensor + assert torch.is_tensor(tokenize_result), "is not PyTorch tensor" diff --git a/linguify_yb/src/trainer.py b/linguify_yb/src/trainer.py index c9f191c3..3634140c 100644 --- a/linguify_yb/src/trainer.py +++ b/linguify_yb/src/trainer.py @@ -135,7 +135,3 @@ def load_checkpoint(model, optimizer, filename): losses = checkpoint["loss"] val_losses = checkpoint["val_loss"] return model - - -def distributed_stra_gpu(): - pass