Skip to content

Commit

Permalink
Added initial implementation for transformer
Browse files Browse the repository at this point in the history
  • Loading branch information
ENate committed Feb 7, 2024
1 parent 4a819e4 commit 2f25af9
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 2 deletions.
Empty file.
Empty file.
25 changes: 25 additions & 0 deletions transformers/from_scratch/src/sentence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import re
from typing import List, Optional


class Sentence:
"""Generate a list of tokens from a list of input sentences """
BOS = "BOS" # Beginning of sentence
EOS = "EOS" # End of Sentence
PAD = "PAD"

def __init__(self, sentence_list: Optional[List[str]] = None):
self.token_2_index = {self.BOS: 0, self.EOS: 1, self.PAD: 2}
self.index_to_token = {v: k for k, v in self.token_2_index.items()}
# Check whether there is a list of input sentences
if not sentence_list:
return # then return to initial state

# Call token method if there is a list of sentences
# Tokenize and substitute BOS, EAS and PAD
for sentence in sentence_list:
self.add_tokens(self.tokenize(sentence))

def add_tokens(self, param):
pass

4 changes: 2 additions & 2 deletions transformers/src/vocabulary.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@


class Vocabulary:
"""Generates a list of tokens from imput string """
"""Generates a list of tokens from input string """
BOS = "BOS"
EOS = "EOS"
PAD = "PAD"

def __init__(self, list_of_sentences: Optional[List[str]]) -> None:
self.token2index = {self.BOS: 0, self.EOS: 1, self.PAD: 2}
self.index2token = {v: k for k, v in self.token2index.items()}
# check whethere there is a list of sentences?
# check whether there is a list of sentences?
if not list_of_sentences:
# then return
return
Expand Down

0 comments on commit 2f25af9

Please sign in to comment.