From a4daed59630e4c7f17a49bc95934b0d31a33be7a Mon Sep 17 00:00:00 2001
From: vinayak <nayakvinayak95@gmail.com>
Date: Sat, 13 Jul 2024 19:52:44 +0530
Subject: [PATCH 1/4] Changed type hinting in rope_test.py

---
 rope_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/rope_test.py b/rope_test.py
index 24dc031..07af55c 100644
--- a/rope_test.py
+++ b/rope_test.py
@@ -2,6 +2,7 @@
 import numpy as np
 
 from rope import apply_rotary_emb
+from typing import Tuple
 
 seed = 0
 
@@ -17,7 +18,7 @@ def construct_key() -> torch.Tensor:
     '''
     return 3 * torch.ones([1, 2, 2, 4])
 
-def test_apply_rotary_emb() -> tuple[torch.Tensor, torch.Tensor]:
+def test_apply_rotary_emb() -> Tuple[torch.Tensor, torch.Tensor]:
     rng = np.random.default_rng(seed)
     torch.manual_seed(seed)
     model = torch.nn.Linear(3, 2, bias=False)

From f4b80af13ca980c636ac00f188763d55936b15eb Mon Sep 17 00:00:00 2001
From: vinayak <nayakvinayak95@gmail.com>
Date: Sat, 13 Jul 2024 20:03:24 +0530
Subject: [PATCH 2/4] Changed type hinting in classifier.py

---
 classifier.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/classifier.py b/classifier.py
index d660aed..67751db 100644
--- a/classifier.py
+++ b/classifier.py
@@ -6,9 +6,10 @@
 from config import LlamaConfig
 from llama import load_pretrained
 from tokenizer import Tokenizer
+from typing import List
 
 class LlamaZeroShotClassifier(torch.nn.Module):
-	def __init__(self, config: LlamaConfig, tokenizer: Tokenizer, label_names: list[str]):
+	def __init__(self, config: LlamaConfig, tokenizer: Tokenizer, label_names: List[str]):
 		super(LlamaZeroShotClassifier, self).__init__()
 		self.num_labels = config.num_labels
 		self.llama = load_pretrained(config.pretrained_model_path)

From ce090dcd1e96ef3450a2eba838d2240c3cd9a92c Mon Sep 17 00:00:00 2001
From: vinayak <nayakvinayak95@gmail.com>
Date: Sat, 13 Jul 2024 20:04:21 +0530
Subject: [PATCH 3/4] Rectified ordering in docstring of
 llama.LlamaLayer.forward

---
 llama.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llama.py b/llama.py
index f101fef..39f75a6 100644
--- a/llama.py
+++ b/llama.py
@@ -191,9 +191,9 @@ def forward(self, x):
         1) layer normalization of the input (via Root Mean Square layer normalization)
         2) self-attention on the layer-normalized input
         3) a residual connection (i.e., add the input to the output of the self-attention)
-        3) layer normalization on the output of the self-attention
-        4) a feed-forward network on the layer-normalized output of the self-attention
-        5) add a residual connection from the unnormalized self-attention output to the
+        4) layer normalization on the output of the self-attention
+        5) a feed-forward network on the layer-normalized output of the self-attention
+        6) add a residual connection from the unnormalized self-attention output to the
            output of the feed-forward network
         '''
         # todo

From d11ef1e69dc5d787d8c93243a7e3b86c6224d5fc Mon Sep 17 00:00:00 2001
From: vinayak <nayakvinayak95@gmail.com>
Date: Sat, 13 Jul 2024 20:04:57 +0530
Subject: [PATCH 4/4] Rectified  section in structure.md

---
 structure.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/structure.md b/structure.md
index 8d126c5..1ee78c7 100644
--- a/structure.md
+++ b/structure.md
@@ -37,9 +37,9 @@ The desired outputs are
 
 ### To be implemented
 Components that require your implementations are comment with ```#todo```. The detailed instructions can be found in their corresponding code blocks
-* ```llama.Attention.forward```
+* ```llama.Attention.compute_query_key_value_scores```
 * ```llama.RMSNorm.norm```
-* ```llama.Llama.forward```
+* ```llama.LlamaLayer.forward```
 * ```llama.Llama.generate```
 * ```rope.apply_rotary_emb``` (this one may be tricky! you can use `rope_test.py` to test your implementation)
 * ```optimizer.AdamW.step```