finish exercise.

v0lta · v0lta · commit 3c7a0a125b15 · 2024-09-04T17:11:01.000+02:00
diff --git a/src/attention_model.py b/src/attention_model.py
@@ -26,16 +26,10 @@ def dot_product_attention(
     Returns:
         torch.Tensor: The attention values of shape  [batch, heads, out_length, d_v]
     """
-    t, dk = q.shape[-2:]
-    device = q.device
-
-    scaled = q @ k.transpose(-2, -1) / torch.sqrt(torch.tensor(dk))
-    if is_causal:
-        scaled = scaled + (-1) * torch.exp(
-            (torch.tril(torch.ones(t, t).to(device)) - 0.5) * -2.0 * torch.inf
-        )
-    soft_scaled = f.softmax(scaled, dim=-1)
-    attention_out = soft_scaled @ v
+    # TODO implement multi head attention.
+    # Use i.e. torch.transpose, torch.sqrt, torch.tril, torch.exp, torch.inf
+    # as well as torch.nn.functional.softmax .
+    attention_out = None
     return attention_out
 
 
diff --git a/src/util.py b/src/util.py
@@ -98,9 +98,5 @@ def convert(sequences: torch.Tensor, inv_vocab: dict) -> list:
         list: A list of characters.
     """
     res = []
-    for int_seq in sequences:
-        char_seq = []
-        for int_char in int_seq:
-            char_seq.append(inv_vocab[int(int_char)])
-        res.append(char_seq)
+    # TODO: Return a nested list of characters.
     return res