Skip to content

Commit 31cb9b7

Browse files
committed
Revert the previous two commits because I forgot to create a new branch :(
1 parent 80bf6e1 commit 31cb9b7

File tree

2 files changed

+3
-5
lines changed

2 files changed

+3
-5
lines changed

silnlp/nmt/config.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -493,9 +493,7 @@ def preprocess(self, stats: bool, force_align: bool = False) -> None:
493493
LOGGER.error(f"The source file {str(file)} does not exist.")
494494
return
495495

496-
if self.data["tokenize"]:
497-
self._build_vocabs(stats)
498-
496+
self._build_vocabs(stats)
499497
tokenizer = self.create_tokenizer()
500498
self._build_corpora(tokenizer, stats, force_align)
501499
LOGGER.info("Preprocessing completed")
@@ -559,7 +557,7 @@ def _build_corpora(self, tokenizer: Tokenizer, stats: bool, force_align: bool) -
559557
dict_count = self._write_dictionary(tokenizer, src_terms_files, trg_terms_files)
560558
LOGGER.info(f"dictionary size: {dict_count}")
561559

562-
if stats and self.data["tokenize"]:
560+
if stats:
563561
self._calculate_tokenization_stats()
564562

565563
return train_count

silnlp/nmt/hugging_face_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,7 @@ def _build_vocabs(self, stats: bool = False) -> None:
540540
["Target", 0],
541541
]
542542

543-
if stats and self.data["tokenize"]:
543+
if stats:
544544
stats_columns = pd.MultiIndex.from_tuples(
545545
[
546546
(" ", "Translation Side"),

0 commit comments

Comments
 (0)