From 70dfa5d0c4f43beec508cfd24fb1351f3c4e2727 Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Thu, 13 Jul 2023 18:19:23 -0500
Subject: [PATCH 01/16] :recycle: Refactor trainer logic and move it to
 resources

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 .../modules/text_generation/fine_tuning.py    | 67 ++++++++-----------
 caikit_nlp/resources/pretrained_model/base.py | 37 +++++++++-
 .../pretrained_model/hf_auto_seq2seq_lm.py    | 44 +++++++++++-
 3 files changed, 105 insertions(+), 43 deletions(-)
diff --git a/caikit_nlp/modules/text_generation/fine_tuning.py b/caikit_nlp/modules/text_generation/fine_tuning.py
index 05b1c075..ca726fa0 100644
--- a/caikit_nlp/modules/text_generation/fine_tuning.py
+++ b/caikit_nlp/modules/text_generation/fine_tuning.py
@@ -15,14 +15,7 @@
 
 # Third Party
 from torch.utils.data import IterableDataset
-from transformers import (
-    AutoConfig,
-    AutoTokenizer,
-    DataCollatorForSeq2Seq,
-    Seq2SeqTrainer,
-    Seq2SeqTrainingArguments,
-    Trainer,
-)
+from transformers import AutoConfig, AutoTokenizer, Trainer
 
 # First Party
 from caikit.core.data_model import DataStream
@@ -32,6 +25,7 @@
 
 # Local
 from ...data_model import GeneratedResult, GenerationTrainRecord
+from ...resources.pretrained_model.base import PretrainedModelBase
 from ...toolkit.data_stream_wrapper import SimpleIterableStreamWrapper
 from ...toolkit.data_type_utils import get_torch_dtype
 from .text_generation_task import TextGenerationTask
@@ -79,6 +73,7 @@ def train(
         lr: float = 2e-5,
         # Directory where model predictions and checkpoints will be written
         checkpoint_dir: str = "/tmp",
+        **training_arguments
     ):
         """
         # FIXME: Below is currently configured for Seq2Seq only
@@ -110,6 +105,7 @@ def train(
             log.debug("Bootstrapping base resource [%s]", base_model)
             base_model = resource_type.bootstrap(base_model, torch_dtype=torch_dtype)
 
+        error.type_check("<NLP03221895E>", PretrainedModelBase, base_model=base_model)
         ## Generate data loader from stream
         training_dataset: IterableDataset = cls._preprocess_function(
             train_stream=train_stream,
@@ -125,40 +121,33 @@ def train(
         # by optionally accepting `training_args`
         # as argument to this train function.
         # TODO: Remove all the default used below and make them all configurable
-        training_args = Seq2SeqTrainingArguments(
-            output_dir=checkpoint_dir,
-            per_device_train_batch_size=batch_size,
-            per_device_eval_batch_size=batch_size,
-            num_train_epochs=num_epochs,
+
+        training_args = {
+            "output_dir": checkpoint_dir,
+            "per_device_train_batch_size": batch_size,
+            "per_device_eval_batch_size": batch_size,
+            "num_train_epochs": num_epochs,
             # NOTE: We have disabled evaluation for now
-            do_eval=False,
-            # evaluation_strategy = "epoch",
-            learning_rate=lr,
-            weight_decay=0.01,
-            save_total_limit=3,
-            predict_with_generate=True,
-            fp16=True,
-            push_to_hub=False,
-            no_cuda=False,  # Default
-            generation_max_length=max_target_length,
-            remove_unused_columns=False,
-            dataloader_pin_memory=False,
-            gradient_accumulation_steps=accumulate_steps,
-            eval_accumulation_steps=accumulate_steps,
+            "do_eval": False,
+            "# evaluation_strategy ": "epoch",
+            "learning_rate": lr,
+            "weight_decay": 0.01,
+            "save_total_limit": 3,
+            "predict_with_generate": True,
+            "fp16": True,
+            "push_to_hub": False,
+            "no_cuda": False,  # Default
+            "generation_max_length": max_target_length,
+            "remove_unused_columns": False,
+            "dataloader_pin_memory": False,
+            "gradient_accumulation_steps": accumulate_steps,
+            "eval_accumulation_steps": accumulate_steps,
             # eval_steps=1,
-        )
+            **training_arguments,
+        }
 
-        data_collator = DataCollatorForSeq2Seq(
-            tokenizer=base_model.tokenizer, model=base_model.model
-        )
-
-        trainer = Seq2SeqTrainer(
-            base_model.model,
-            training_args,
-            train_dataset=training_dataset,
-            data_collator=data_collator,
-            tokenizer=base_model.tokenizer,
-            # compute_metrics=compute_metrics,
+        trainer = base_model.get_trainer(
+            train_dataset=training_dataset, **training_args
         )
 
         # Start training via Trainer.train function
diff --git a/caikit_nlp/resources/pretrained_model/base.py b/caikit_nlp/resources/pretrained_model/base.py
index 079a6f83..3c9e26a7 100644
--- a/caikit_nlp/resources/pretrained_model/base.py
+++ b/caikit_nlp/resources/pretrained_model/base.py
@@ -14,12 +14,13 @@
 
 # Standard
 from abc import ABC, abstractmethod
-from typing import List, Optional, Type
+from typing import List, Optional, Type, Union
 import json
 import os
 
 # Third Party
-from transformers import AutoTokenizer
+from torch.utils.data import IterableDataset
+from transformers import AutoTokenizer, DataCollator, Trainer, TrainingArguments
 from transformers.models.auto.auto_factory import _BaseAutoModelClass
 import torch
 
@@ -233,6 +234,38 @@ def save(
             self.tokenizer.save_pretrained(tok_abs_path)
             self.model.save_pretrained(model_abs_path)
 
+    def get_trainer(
+        self,
+        train_dataset: IterableDataset,
+        eval_dataset: Union[IterableDataset, None] = None,
+        optimizers=(None, None),
+        **kwargs,
+    ):
+        """
+        NOTE: following parameters are not supported currently:
+            1. model_init
+            2. compute_metrics
+            3. callbacks
+            4. preprocess_logits_for_metrics
+        """
+
+        training_args = TrainingArguments(**kwargs)
+
+        # TODO: Fetch DataCollator either from property of this
+        # class or fetch it as an argument.
+        data_collator = DataCollator(tokenizer=self._tokenizer, model=self._model)
+
+        # pylint: disable=duplicate-code
+        trainer_arguments = {
+            "train_dataset": train_dataset,
+            "data_collator": data_collator,
+            "tokenizer": self._tokenizer,
+            "optimizers": optimizers,
+            "eval_dataset": eval_dataset,
+        }
+
+        return Trainer(self._model, training_args, **trainer_arguments)
+
     # pylint: disable=unused-argument
     @classmethod
     def get_num_transformers_submodules(
diff --git a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
index d0627708..1a06cd9b 100644
--- a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
+++ b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
@@ -15,10 +15,16 @@
 Huggingface auto causal LM resource type
 """
 # Standard
-from typing import List
+from typing import List, Union
 
 # Third Party
-from transformers import AutoModelForSeq2SeqLM
+from torch.utils.data import IterableDataset
+from transformers import (
+    AutoModelForSeq2SeqLM,
+    DataCollatorForSeq2Seq,
+    Seq2SeqTrainer,
+    Seq2SeqTrainingArguments,
+)
 from transformers.models.auto import modeling_auto
 
 # First Party
@@ -64,3 +70,37 @@ def get_num_transformers_submodules(
             "<NLP71505742E>", 0 < num_transformer_submodules <= cls.MAX_NUM_TRANSFORMERS
         )
         return num_transformer_submodules
+
+    def get_trainer(
+        self,
+        train_dataset: IterableDataset,
+        eval_dataset: Union[IterableDataset, None] = None,
+        optimizers=(None, None),
+        **kwargs
+    ):
+        """
+        NOTE: following parameters are not supported currently:
+            1. model_init
+            2. compute_metrics
+            3. callbacks
+            4. preprocess_logits_for_metrics
+        """
+
+        training_args = Seq2SeqTrainingArguments(**kwargs)
+
+        # TODO: Fetch DataCollator either from property of this
+        # class or fetch it as an argument.
+        data_collator = DataCollatorForSeq2Seq(
+            tokenizer=self._tokenizer, model=self._model
+        )
+
+        # pylint: disable=duplicate-code
+        trainer_arguments = {
+            "train_dataset": train_dataset,
+            "data_collator": data_collator,
+            "tokenizer": self._tokenizer,
+            "optimizers": optimizers,
+            "eval_dataset": eval_dataset,
+        }
+
+        return Seq2SeqTrainer(self._model, training_args, **trainer_arguments)

From e9d21ffda5700e095e4db83b47f755e28a1e4b70 Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Sun, 16 Jul 2023 16:52:10 -0500
Subject: [PATCH 02/16] :construction: Work in progress causal-lm trainer

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 .../modules/text_generation/fine_tuning.py    |  4 +-
 caikit_nlp/resources/pretrained_model/base.py | 32 +++++++++++---
 .../pretrained_model/hf_auto_seq2seq_lm.py    |  4 +-
 .../text_generation/test_fine_tuning.py       | 43 +++++++++++++++++--
 4 files changed, 70 insertions(+), 13 deletions(-)

diff --git a/caikit_nlp/modules/text_generation/fine_tuning.py b/caikit_nlp/modules/text_generation/fine_tuning.py
index ca726fa0..18951bfa 100644
--- a/caikit_nlp/modules/text_generation/fine_tuning.py
+++ b/caikit_nlp/modules/text_generation/fine_tuning.py
@@ -129,15 +129,13 @@ def train(
             "num_train_epochs": num_epochs,
             # NOTE: We have disabled evaluation for now
             "do_eval": False,
-            "# evaluation_strategy ": "epoch",
+            # "evaluation_strategy ": "epoch",
             "learning_rate": lr,
             "weight_decay": 0.01,
             "save_total_limit": 3,
-            "predict_with_generate": True,
             "fp16": True,
             "push_to_hub": False,
             "no_cuda": False,  # Default
-            "generation_max_length": max_target_length,
             "remove_unused_columns": False,
             "dataloader_pin_memory": False,
             "gradient_accumulation_steps": accumulate_steps,
diff --git a/caikit_nlp/resources/pretrained_model/base.py b/caikit_nlp/resources/pretrained_model/base.py
index 3c9e26a7..f7e231f5 100644
--- a/caikit_nlp/resources/pretrained_model/base.py
+++ b/caikit_nlp/resources/pretrained_model/base.py
@@ -20,7 +20,7 @@
 
 # Third Party
 from torch.utils.data import IterableDataset
-from transformers import AutoTokenizer, DataCollator, Trainer, TrainingArguments
+from transformers import AutoTokenizer, DataCollatorWithPadding, Trainer, TrainingArguments
 from transformers.models.auto.auto_factory import _BaseAutoModelClass
 import torch
 
@@ -251,11 +251,8 @@ def get_trainer(
 
         training_args = TrainingArguments(**kwargs)
 
-        # TODO: Fetch DataCollator either from property of this
-        # class or fetch it as an argument.
-        data_collator = DataCollator(tokenizer=self._tokenizer, model=self._model)
+        data_collator = self._get_data_collator(**kwargs)
 
-        # pylint: disable=duplicate-code
         trainer_arguments = {
             "train_dataset": train_dataset,
             "data_collator": data_collator,
@@ -266,6 +263,31 @@ def get_trainer(
 
         return Trainer(self._model, training_args, **trainer_arguments)
 
+
+    def _get_data_collator(self, **kwargs):
+        """Function to return appropriate data collator based on resource.
+
+        The default implementation of the base resource uses
+        DataCollatorWithPadding which will dynamically pad the inputs received.
+
+        Args:
+            **kwargs:
+                All the keyword arguments passed to this function
+                will get filtered out to appropriate ones that are
+                applicable to implemented data collator.
+        Returns:
+            transformers.DataCollator
+        """
+
+        applicable_args = ["max_length", "pad_to_multiple_of"]
+        collator_kwargs = {key: kwargs[key] for key in applicable_args if key in kwargs}
+
+        return DataCollatorWithPadding(
+            tokenizer=self._tokenizer,
+            padding=True,
+            **collator_kwargs
+        )
+
     # pylint: disable=unused-argument
     @classmethod
     def get_num_transformers_submodules(
diff --git a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
index 1a06cd9b..bf61dfcf 100644
--- a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
+++ b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
@@ -94,13 +94,15 @@ def get_trainer(
             tokenizer=self._tokenizer, model=self._model
         )
 
-        # pylint: disable=duplicate-code
         trainer_arguments = {
             "train_dataset": train_dataset,
             "data_collator": data_collator,
             "tokenizer": self._tokenizer,
             "optimizers": optimizers,
             "eval_dataset": eval_dataset,
+            # Following only applicable for seq2seq
+            "predict_with_generate": True,
+            # "generation_max_length": max_target_length,
         }
 
         return Seq2SeqTrainer(self._model, training_args, **trainer_arguments)
diff --git a/tests/modules/text_generation/test_fine_tuning.py b/tests/modules/text_generation/test_fine_tuning.py
index e2491851..f82c79c3 100644
--- a/tests/modules/text_generation/test_fine_tuning.py
+++ b/tests/modules/text_generation/test_fine_tuning.py
@@ -9,8 +9,8 @@
 # Local
 from caikit_nlp.data_model import GeneratedResult, GenerationTrainRecord
 from caikit_nlp.modules.text_generation import FineTuning
-from caikit_nlp.resources.pretrained_model import HFAutoSeq2SeqLM
-from tests.fixtures import SEQ2SEQ_LM_MODEL, disable_wip
+from caikit_nlp.resources.pretrained_model import HFAutoCausalLM, HFAutoSeq2SeqLM
+from tests.fixtures import CAUSAL_LM_MODEL, SEQ2SEQ_LM_MODEL, disable_wip
 
 
 @pytest.mark.skip(
@@ -20,8 +20,9 @@
 # run function
 """
 )
-def test_train_model(disable_wip):
-    """Ensure that we can train a model on some toy data for 1+ steps & run inference."""
+def test_train_model_seq2seq(disable_wip):
+    """Ensure that we can finetune a seq2seq model on some toy data for 1+
+    steps & run inference."""
     train_kwargs = {
         "base_model": HFAutoSeq2SeqLM.bootstrap(
             model_name=SEQ2SEQ_LM_MODEL, tokenizer_name=SEQ2SEQ_LM_MODEL
@@ -44,3 +45,37 @@ def test_train_model(disable_wip):
     # Ensure that we can get something out of it
     pred = model.run("@bar what a cute cat!")
     assert isinstance(pred, GeneratedResult)
+
+
+# @pytest.mark.skip(
+#     """
+# We are skipping this test because we are waiting for new release
+# of transformers library that includes bugfix that is currently breaking
+# # run function
+# """
+# )
+def test_train_model_causallm(disable_wip):
+    """Ensure that we can finetune a causal-lm model on some toy data for 1+
+    steps & run inference."""
+    train_kwargs = {
+        "base_model": HFAutoCausalLM.bootstrap(
+            model_name=CAUSAL_LM_MODEL, tokenizer_name=CAUSAL_LM_MODEL
+        ),
+        "num_epochs": 1,
+        "train_stream": caikit.core.data_model.DataStream.from_iterable(
+            [
+                GenerationTrainRecord(
+                    input="@foo what a cute dog!", output="no complaint"
+                ),
+                GenerationTrainRecord(
+                    input="@bar this is the worst idea ever.", output="complaint"
+                ),
+            ]
+        ),
+        "torch_dtype": torch.float32,
+    }
+    model = FineTuning.train(**train_kwargs)
+    assert isinstance(model.model, Trainer)
+    # Ensure that we can get something out of it
+    pred = model.run("@bar what a cute cat!")
+    assert isinstance(pred, GeneratedResult)
\ No newline at end of file

From e067c6dd9ce06ea405adb1fd0052ab6c4536eb55 Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Tue, 18 Jul 2023 15:13:27 -0500
Subject: [PATCH 03/16] :construction: Implement seq2seq collator in resources

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 .../pretrained_model/hf_auto_seq2seq_lm.py    | 28 +++++++++++++++++--
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
index bf61dfcf..4ced36d9 100644
--- a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
+++ b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
@@ -90,9 +90,7 @@ def get_trainer(
 
         # TODO: Fetch DataCollator either from property of this
         # class or fetch it as an argument.
-        data_collator = DataCollatorForSeq2Seq(
-            tokenizer=self._tokenizer, model=self._model
-        )
+        data_collator = self._get_data_collator(**kwargs)
 
         trainer_arguments = {
             "train_dataset": train_dataset,
@@ -106,3 +104,27 @@ def get_trainer(
         }
 
         return Seq2SeqTrainer(self._model, training_args, **trainer_arguments)
+
+
+    def _get_data_collator(self, **kwargs):
+        """Function to return appropriate data collator based on resource.
+
+        This implementation uses DataCollatorForSeq2Seq
+
+        Args:
+            **kwargs:
+                All the keyword arguments passed to this function
+                will get filtered out to appropriate ones that are
+                applicable to implemented data collator.
+        Returns:
+            transformers.DataCollator
+        """
+
+        applicable_args = ["max_length", "pad_to_multiple_of"]
+        collator_kwargs = {key: kwargs[key] for key in applicable_args if key in kwargs}
+
+        return DataCollatorForSeq2Seq(
+            tokenizer=self._tokenizer,
+            model=self._model,
+            **collator_kwargs
+        )
\ No newline at end of file

From f539380121f6eac5c6c228eb41beb7beb6cc86e1 Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Sat, 29 Jul 2023 12:17:49 -0500
Subject: [PATCH 04/16] :art: Fix linting and formatting

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 caikit_nlp/modules/text_generation/fine_tuning.py    |  7 ++-----
 caikit_nlp/resources/pretrained_model/base.py        | 12 +++++++-----
 .../resources/pretrained_model/hf_auto_seq2seq_lm.py |  7 +++----
 tests/modules/text_generation/test_fine_tuning.py    |  3 ++-
 4 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/caikit_nlp/modules/text_generation/fine_tuning.py b/caikit_nlp/modules/text_generation/fine_tuning.py
index 91eef1ef..16cfda15 100644
--- a/caikit_nlp/modules/text_generation/fine_tuning.py
+++ b/caikit_nlp/modules/text_generation/fine_tuning.py
@@ -18,9 +18,6 @@
 from transformers import (
     AutoConfig,
     AutoTokenizer,
-    DataCollatorForSeq2Seq,
-    Seq2SeqTrainer,
-    Seq2SeqTrainingArguments,
     Trainer,
 )
 import torch
@@ -34,7 +31,7 @@
 import alog
 
 # Local
-from ...data_model import GeneratedTextResult, GenerationTrainRecord
+from ...data_model import GenerationTrainRecord
 from ...resources.pretrained_model.base import PretrainedModelBase
 from ...toolkit.data_stream_wrapper import SimpleIterableStreamWrapper
 from ...toolkit.data_type_utils import get_torch_dtype
@@ -82,7 +79,7 @@ def train(
         lr: float = 2e-5,
         # Directory where model predictions and checkpoints will be written
         checkpoint_dir: str = "/tmp",
-        **training_arguments
+        **training_arguments,
     ):
         """
         # FIXME: Below is currently configured for Seq2Seq only
diff --git a/caikit_nlp/resources/pretrained_model/base.py b/caikit_nlp/resources/pretrained_model/base.py
index 13bc451b..6e9ea641 100644
--- a/caikit_nlp/resources/pretrained_model/base.py
+++ b/caikit_nlp/resources/pretrained_model/base.py
@@ -20,7 +20,12 @@
 
 # Third Party
 from torch.utils.data import IterableDataset
-from transformers import AutoTokenizer, DataCollatorWithPadding, Trainer, TrainingArguments
+from transformers import (
+    AutoTokenizer,
+    DataCollatorWithPadding,
+    Trainer,
+    TrainingArguments,
+)
 from transformers.models.auto.auto_factory import _BaseAutoModelClass
 import torch
 
@@ -263,7 +268,6 @@ def get_trainer(
 
         return Trainer(self._model, training_args, **trainer_arguments)
 
-
     def _get_data_collator(self, **kwargs):
         """Function to return appropriate data collator based on resource.
 
@@ -283,9 +287,7 @@ def _get_data_collator(self, **kwargs):
         collator_kwargs = {key: kwargs[key] for key in applicable_args if key in kwargs}
 
         return DataCollatorWithPadding(
-            tokenizer=self._tokenizer,
-            padding=True,
-            **collator_kwargs
+            tokenizer=self._tokenizer, padding=True, **collator_kwargs
         )
 
     # pylint: disable=unused-argument
diff --git a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
index aed29bb1..df43ea2d 100644
--- a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
+++ b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
@@ -91,6 +91,7 @@ def get_trainer(
 
         training_args = Seq2SeqTrainingArguments(**kwargs)
 
+        # pylint: disable=duplicate-code
         # TODO: Fetch DataCollator either from property of this
         # class or fetch it as an argument.
         data_collator = self._get_data_collator(**kwargs)
@@ -108,7 +109,6 @@ def get_trainer(
 
         return Seq2SeqTrainer(self._model, training_args, **trainer_arguments)
 
-
     def _get_data_collator(self, **kwargs):
         """Function to return appropriate data collator based on resource.
 
@@ -127,10 +127,9 @@ def _get_data_collator(self, **kwargs):
         collator_kwargs = {key: kwargs[key] for key in applicable_args if key in kwargs}
 
         return DataCollatorForSeq2Seq(
-            tokenizer=self._tokenizer,
-            model=self._model,
-            **collator_kwargs
+            tokenizer=self._tokenizer, model=self._model, **collator_kwargs
         )
+
     @staticmethod
     def build_task_tokenize_function(
         tokenizer: "AutoTokenizer",
diff --git a/tests/modules/text_generation/test_fine_tuning.py b/tests/modules/text_generation/test_fine_tuning.py
index 9e54206d..64974425 100644
--- a/tests/modules/text_generation/test_fine_tuning.py
+++ b/tests/modules/text_generation/test_fine_tuning.py
@@ -64,6 +64,7 @@ def test_train_model_causallm(disable_wip):
     pred = model.run("@bar what a cute cat!")
     assert isinstance(pred, GeneratedTextResult)
 
+
 ############################## Error Cases ################################
 
 
@@ -84,4 +85,4 @@ def test_zero_epoch_case(disable_wip):
         "torch_dtype": torch.float32,
     }
     model = FineTuning.train(**train_kwargs)
-    assert isinstance(model.model, Trainer)
\ No newline at end of file
+    assert isinstance(model.model, Trainer)

From e1c8f38ca0f56d2fd2176c791eec48ad39eabd76 Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Sat, 29 Jul 2023 12:26:23 -0500
Subject: [PATCH 05/16] :bug: Fix seq2seq training arguments

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 .../resources/pretrained_model/hf_auto_seq2seq_lm.py       | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
index df43ea2d..cd6843ff 100644
--- a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
+++ b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
@@ -89,7 +89,10 @@ def get_trainer(
             4. preprocess_logits_for_metrics
         """
 
-        training_args = Seq2SeqTrainingArguments(**kwargs)
+        training_args = Seq2SeqTrainingArguments(
+                predict_with_generate=True,
+                **kwargs
+            )
 
         # pylint: disable=duplicate-code
         # TODO: Fetch DataCollator either from property of this
@@ -102,8 +105,6 @@ def get_trainer(
             "tokenizer": self._tokenizer,
             "optimizers": optimizers,
             "eval_dataset": eval_dataset,
-            # Following only applicable for seq2seq
-            "predict_with_generate": True,
             # "generation_max_length": max_target_length,
         }
 

From 1724b603019d7b080c0ff68182f8b3308300b373 Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Sat, 29 Jul 2023 16:06:58 -0500
Subject: [PATCH 06/16] :bug: Remove task ids from resource tokenization
 functions

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 .../modules/text_generation/fine_tuning.py    | 42 ++++++-------------
 .../pretrained_model/hf_auto_causal_lm.py     |  3 +-
 .../pretrained_model/hf_auto_seq2seq_lm.py    |  8 ++--
 3 files changed, 18 insertions(+), 35 deletions(-)

diff --git a/caikit_nlp/modules/text_generation/fine_tuning.py b/caikit_nlp/modules/text_generation/fine_tuning.py
index 16cfda15..8183c89e 100644
--- a/caikit_nlp/modules/text_generation/fine_tuning.py
+++ b/caikit_nlp/modules/text_generation/fine_tuning.py
@@ -15,11 +15,7 @@
 
 # Third Party
 from torch.utils.data import IterableDataset
-from transformers import (
-    AutoConfig,
-    AutoTokenizer,
-    Trainer,
-)
+from transformers import AutoConfig, AutoTokenizer, Trainer
 import torch
 
 # First Party
@@ -114,6 +110,7 @@ def train(
         error.type_check("<NLP03221895E>", PretrainedModelBase, base_model=base_model)
         ## Generate data loader from stream
         training_dataset: IterableDataset = cls._preprocess_function(
+            base_model=base_model,
             train_stream=train_stream,
             tokenizer=base_model.tokenizer,
             max_source_length=max_source_length,
@@ -259,6 +256,7 @@ def run(
 
     @staticmethod
     def _preprocess_function(
+        base_model: PretrainedModelBase,
         train_stream: DataStream[GenerationTrainRecord],
         tokenizer: AutoTokenizer,
         max_source_length: int,
@@ -267,28 +265,14 @@ def _preprocess_function(
     ):
         """Pre-process each example to get it prepared for training."""
 
-        # FIXME: Below is currently configured for Seq2Seq only
-
-        def _tokenization_func(
-            example: GenerationTrainRecord,
-        ):
-            model_inputs = tokenizer(
-                example.input,
-                max_length=max_source_length,
-                truncation=True,
-            )
-
-            labels = tokenizer(
-                example.output,
-                max_length=max_target_length,
-                padding="max_length",
-                truncation=True,
-            )
-
-            model_inputs["labels"] = labels["input_ids"]
-
-            return model_inputs
-
-        return SimpleIterableStreamWrapper(
-            train_stream.map(_tokenization_func), shuffle=shuffle
+        (
+            tokenize_function,
+            requires_unwrapping,
+        ) = base_model.build_task_tokenize_function(
+            tokenizer, max_source_length, max_target_length, verbalizer=""
         )
+        mapped_stream = train_stream.map(tokenize_function)
+        if requires_unwrapping:
+            mapped_stream = mapped_stream.flatten()
+
+        return SimpleIterableStreamWrapper(mapped_stream, shuffle=shuffle)
diff --git a/caikit_nlp/resources/pretrained_model/hf_auto_causal_lm.py b/caikit_nlp/resources/pretrained_model/hf_auto_causal_lm.py
index b98a2983..cefe7838 100644
--- a/caikit_nlp/resources/pretrained_model/hf_auto_causal_lm.py
+++ b/caikit_nlp/resources/pretrained_model/hf_auto_causal_lm.py
@@ -104,7 +104,8 @@ def tokenize_function_language_model(
             # Here, we need to yield and manipulate the attention mask to attend
             # to the input seq + the tokens we have seen so far...
             num_target_samples = len(target_ids.input_ids)
-            source_ids["task_ids"] = 0
+            # TODO: Why do we need task ids here??
+            # source_ids["task_ids"] = 0
 
             def generator_func():
                 for idx in range(num_target_samples):
diff --git a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
index cd6843ff..6951f5fb 100644
--- a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
+++ b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
@@ -89,10 +89,7 @@ def get_trainer(
             4. preprocess_logits_for_metrics
         """
 
-        training_args = Seq2SeqTrainingArguments(
-                predict_with_generate=True,
-                **kwargs
-            )
+        training_args = Seq2SeqTrainingArguments(predict_with_generate=True, **kwargs)
 
         # pylint: disable=duplicate-code
         # TODO: Fetch DataCollator either from property of this
@@ -197,7 +194,8 @@ def tokenize_function_seq2seq(
                 map(lambda x: IGNORE_ID if x == tokenizer.pad_token_id else x, labels)
             )
             model_inputs["labels"] = labels
-            model_inputs["task_ids"] = 0
+            # TODO: Why do we need task ids here??
+            # model_inputs["task_ids"] = 0
             return model_inputs
 
         return (tokenize_function_seq2seq, False)

From 9a8f877b285b7ddc81e6544d673a67bd32bf27e4 Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Sun, 30 Jul 2023 14:18:37 -0500
Subject: [PATCH 07/16] :white_check_mark: Add cuda device fixture to get
 around cuda unit testing when available

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 .../modules/text_generation/fine_tuning.py    |  1 -
 .../text_generation/peft_prompt_tuning.py     | 26 ++++++++++++-------
 caikit_nlp/resources/pretrained_model/base.py |  5 ++++
 .../pretrained_model/hf_auto_causal_lm.py     | 12 ++++++---
 .../pretrained_model/hf_auto_seq2seq_lm.py    | 10 +++++--
 tests/fixtures/__init__.py                    | 13 ++++++++++
 .../text_generation/test_fine_tuning.py       |  6 ++---
 .../test_peft_prompt_tuning.py                |  8 +++---
 tests/resources/test_pretrained_model.py      |  3 +++
 9 files changed, 63 insertions(+), 21 deletions(-)

diff --git a/caikit_nlp/modules/text_generation/fine_tuning.py b/caikit_nlp/modules/text_generation/fine_tuning.py
index 8183c89e..38e907df 100644
--- a/caikit_nlp/modules/text_generation/fine_tuning.py
+++ b/caikit_nlp/modules/text_generation/fine_tuning.py
@@ -153,7 +153,6 @@ def train(
             "learning_rate": lr,
             "weight_decay": 0.01,
             "save_total_limit": 3,
-            "fp16": True,
             "push_to_hub": False,
             "no_cuda": False,  # Default
             "remove_unused_columns": False,
diff --git a/caikit_nlp/modules/text_generation/peft_prompt_tuning.py b/caikit_nlp/modules/text_generation/peft_prompt_tuning.py
index 6b4e7e7f..60c6efcc 100644
--- a/caikit_nlp/modules/text_generation/peft_prompt_tuning.py
+++ b/caikit_nlp/modules/text_generation/peft_prompt_tuning.py
@@ -173,7 +173,6 @@ def __del__(self):
     def run(
         self,
         text: str,
-        device: Optional[Union[str, int]] = _DETECT_DEVICE,
         max_new_tokens=20,
         min_new_tokens=0,
     ) -> GeneratedTextResult:
@@ -182,8 +181,6 @@ def run(
         Args:
             text: str
                 Input string to be used to the generation model.
-            device: Optional[Union[str, int]]
-                Device on which we should run inference; by default, we use the detected device.
             max_new_tokens: int
                 The maximum numbers of tokens to generate.
                 Default: 20
@@ -199,8 +196,8 @@ def run(
         verbalized_text = render_verbalizer(self.verbalizer, {"input": text})
         # Apply the tokenizer to the sample text & move to correct device
         tok_tensors = self.tokenizer(verbalized_text, return_tensors="pt")
-        device = PeftPromptTuning._get_device(device)
-        inputs = {k: v.to(device) for k, v in tok_tensors.items()}
+
+        inputs = {k: v.to(self.model.device) for k, v in tok_tensors.items()}
         with torch.no_grad():
             # Run tokenized tensors through the rest of the PEFT model
             outputs = self.model.generate(
@@ -604,7 +601,12 @@ def save(self, model_path: str, save_base_model: bool = False):
             module_saver.update_config(config_options)
 
     @classmethod
-    def load(cls, model_path: str, torch_dtype: str = None) -> "PeftPromptTuning":
+    def load(
+        cls,
+        model_path: str,
+        torch_dtype: str = None,
+        device: str = _DETECT_DEVICE,  # TODO: Union[int, str]
+        ) -> "PeftPromptTuning":
         """Load a PEFT prompt tuning model. This method will currently fail if the original
         model was not saved with the arg value save_base_model=True.
 
@@ -626,7 +628,7 @@ def load(cls, model_path: str, torch_dtype: str = None) -> "PeftPromptTuning":
             torch_dtype = str_to_torch_dtype(config.trained_torch_dtype)
         if config.has_base_model:
             # TODO: Implement logic for resource loading
-            device = cls._get_device(cls._DETECT_DEVICE)
+            device = cls._get_device(device)
             model_config = os.path.join(model_path, config.full_model_path)
             peft_config = PeftConfig.from_pretrained(model_config)
             if peft_config.task_type == "CAUSAL_LM":
@@ -1005,7 +1007,7 @@ def _get_data_loaders_from_stream(
             tokenize_function,
             requires_unwrapping,
         ) = base_model.build_task_tokenize_function(
-            tokenizer, max_source_length, max_target_length, verbalizer
+            tokenizer, max_source_length, max_target_length, verbalizer, task_ids=0
         )
         mapped_stream = train_stream.map(tokenize_function)
         if requires_unwrapping:
@@ -1066,7 +1068,13 @@ def _execute_train_loop(
             num_training_steps=(len(train_dataloader) * num_epochs),
         )
         # Configure accelerator for gradient accumulation
-        accelerator = Accelerator(gradient_accumulation_steps=accumulate_steps)
+        accelerator_args = {
+            "gradient_accumulation_steps": accumulate_steps,
+            "device_placement": True
+        }
+
+        accelerator = Accelerator(**accelerator_args)
+
         for epoch in range(num_epochs):
             model.train()
             total_loss = 0
diff --git a/caikit_nlp/resources/pretrained_model/base.py b/caikit_nlp/resources/pretrained_model/base.py
index 6e9ea641..e4a732b2 100644
--- a/caikit_nlp/resources/pretrained_model/base.py
+++ b/caikit_nlp/resources/pretrained_model/base.py
@@ -306,6 +306,7 @@ def build_task_tokenize_function(
         max_source_length: int,
         max_target_length: int,
         verbalizer: str,
+        task_ids: Union[None, int] = None,
     ) -> Tuple[Callable, bool]:
         """Builds tokenizer functions which can be mapped over train streams to process
         data which can then be easily passed to a DataLoader for different model types.
@@ -320,6 +321,10 @@ def build_task_tokenize_function(
             verbalizer: str
                 Verbalizer template to be used for formatting data. This template may use brackets
                 to indicate where fields from the data model TrainGenerationRecord must be rendered.
+            task_ids: Union[None, int]
+                Task id corresponding particular task for multi-task prompt tuning.
+                NOTE: Only required for MPT (Multi-task prompt tuning)
+                Default: None
 
         Returns:
             Tuple(Callable, bool)
diff --git a/caikit_nlp/resources/pretrained_model/hf_auto_causal_lm.py b/caikit_nlp/resources/pretrained_model/hf_auto_causal_lm.py
index cefe7838..6a832ac5 100644
--- a/caikit_nlp/resources/pretrained_model/hf_auto_causal_lm.py
+++ b/caikit_nlp/resources/pretrained_model/hf_auto_causal_lm.py
@@ -16,7 +16,7 @@
 """
 # Standard
 from copy import deepcopy
-from typing import Callable, Tuple
+from typing import Callable, Tuple, Union
 
 # Third Party
 from transformers import AutoModelForCausalLM
@@ -52,6 +52,7 @@ def build_task_tokenize_function(
         max_source_length: int,
         max_target_length: int,
         verbalizer: str,
+        task_ids: Union[None, int] = None,
     ) -> Tuple[Callable, bool]:
         """Builds tokenizer functions which can be mapped over train streams to process
         data which can then be easily passed to a DataLoader for CausalLM models.
@@ -66,6 +67,10 @@ def build_task_tokenize_function(
             verbalizer: str
                 Verbalizer template to be used for formatting data. This template may use brackets
                 to indicate where fields from the data model TrainGenerationRecord must be rendered.
+            task_ids: Union[None, int]
+                Task id corresponding particular task for multi-task prompt tuning.
+                NOTE: Only required for MPT (Multi-task prompt tuning)
+                Default: None
 
         Returns:
             Tuple(Callable, bool)
@@ -104,8 +109,9 @@ def tokenize_function_language_model(
             # Here, we need to yield and manipulate the attention mask to attend
             # to the input seq + the tokens we have seen so far...
             num_target_samples = len(target_ids.input_ids)
-            # TODO: Why do we need task ids here??
-            # source_ids["task_ids"] = 0
+
+            if task_ids is not None:
+                source_ids["task_ids"] = task_ids
 
             def generator_func():
                 for idx in range(num_target_samples):
diff --git a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
index 6951f5fb..880b6db1 100644
--- a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
+++ b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
@@ -134,6 +134,7 @@ def build_task_tokenize_function(
         max_source_length: int,
         max_target_length: int,
         verbalizer: str,
+        task_ids: Union[None, int] = None,
     ) -> Tuple[Callable, bool]:
         """Builds tokenizer functions which can be mapped over train streams to process
         data which can then be easily passed to a DataLoader for seq2seq models.
@@ -148,6 +149,10 @@ def build_task_tokenize_function(
             verbalizer: str
                 Verbalizer template to be used for formatting data. This template may use brackets
                 to indicate where fields from the data model TrainGenerationRecord must be rendered.
+            task_ids: Union[None, int]
+                Task id corresponding particular task for multi-task prompt tuning.
+                NOTE: Only required for MPT (Multi-task prompt tuning)
+                Default: None
 
         Returns:
             Tuple(Callable, bool)
@@ -194,8 +199,9 @@ def tokenize_function_seq2seq(
                 map(lambda x: IGNORE_ID if x == tokenizer.pad_token_id else x, labels)
             )
             model_inputs["labels"] = labels
-            # TODO: Why do we need task ids here??
-            # model_inputs["task_ids"] = 0
+            if task_ids is not None:
+                model_inputs["task_ids"] = task_ids
+
             return model_inputs
 
         return (tokenize_function_seq2seq, False)
diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py
index 2cc4f0b7..ffc8f187 100644
--- a/tests/fixtures/__init__.py
+++ b/tests/fixtures/__init__.py
@@ -32,6 +32,19 @@
 SEQ2SEQ_LM_MODEL = os.path.join(TINY_MODELS_DIR, "T5ForConditionalGeneration")
 
 
+@pytest.fixture()
+def set_cpu_device(request):
+    """Fixture to set default cuda device.
+    This fixture is particularly useful for running the unit tests where
+    cuda devices are available, in which case, some transformers function
+    may try to consume cuda and give device mismatch error.
+    """
+    visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "")
+    os.environ["CUDA_VISIBLE_DEVICES"] = ""
+    with mock.patch.object(torch.cuda, 'is_available', return_value=False):
+        yield
+    os.environ["CUDA_VISIBLE_DEVICES"] = visible_devices
+
 @pytest.fixture
 def disable_wip(request):
     """Fixture to temporarily disable wip decorator"""
diff --git a/tests/modules/text_generation/test_fine_tuning.py b/tests/modules/text_generation/test_fine_tuning.py
index 64974425..458e8794 100644
--- a/tests/modules/text_generation/test_fine_tuning.py
+++ b/tests/modules/text_generation/test_fine_tuning.py
@@ -11,10 +11,10 @@
 from caikit_nlp.data_model import GenerationTrainRecord
 from caikit_nlp.modules.text_generation import FineTuning
 from caikit_nlp.resources.pretrained_model import HFAutoCausalLM, HFAutoSeq2SeqLM
-from tests.fixtures import CAUSAL_LM_MODEL, SEQ2SEQ_LM_MODEL, disable_wip
+from tests.fixtures import CAUSAL_LM_MODEL, SEQ2SEQ_LM_MODEL, disable_wip, set_cpu_device
 
 
-def test_train_model_seq2seq(disable_wip):
+def test_train_model_seq2seq(disable_wip, set_cpu_device):
     """Ensure that we can finetune a seq2seq model on some toy data for 1+
     steps & run inference."""
     train_kwargs = {
@@ -41,7 +41,7 @@ def test_train_model_seq2seq(disable_wip):
     assert isinstance(pred, GeneratedTextResult)
 
 
-def test_train_model_causallm(disable_wip):
+def test_train_model_causallm(disable_wip, set_cpu_device):
     """Ensure that we can finetune a causal-lm model on some toy data for 1+
     steps & run inference."""
     train_kwargs = {
diff --git a/tests/modules/text_generation/test_peft_prompt_tuning.py b/tests/modules/text_generation/test_peft_prompt_tuning.py
index 8ce87ff2..22361b9d 100644
--- a/tests/modules/text_generation/test_peft_prompt_tuning.py
+++ b/tests/modules/text_generation/test_peft_prompt_tuning.py
@@ -28,6 +28,7 @@
 from tests.fixtures import (
     causal_lm_dummy_model,
     causal_lm_train_kwargs,
+    set_cpu_device,
     seq2seq_lm_dummy_model,
     seq2seq_lm_train_kwargs,
 )
@@ -36,8 +37,9 @@
 # Indexes into the peft config dictionary to get the actual prompt tuning config
 DEFAULT_ADAPTER = "default"
 
+
 ### Tests validating block interfaces and behavior
-def test_save_and_reload_with_base_model(causal_lm_dummy_model):
+def test_save_and_reload_with_base_model(causal_lm_dummy_model, set_cpu_device):
     """Ensure that we can save a model + its base to a tempdir and reload it."""
     with tempfile.TemporaryDirectory() as model_dir:
         causal_lm_dummy_model.save(model_dir, save_base_model=True)
@@ -109,7 +111,7 @@ def test_verbalizer_cannot_be_static(causal_lm_train_kwargs):
         )
 
 
-def test_train_model(causal_lm_train_kwargs):
+def test_train_model(causal_lm_train_kwargs, set_cpu_device):
     """Ensure that we can train a model on some toy data for 1+ steps & run inference."""
     patch_kwargs = {
         "num_epochs": 1,
@@ -138,7 +140,7 @@ def test_train_model(causal_lm_train_kwargs):
     assert isinstance(pred, GeneratedTextResult)
 
 
-def test_train_model_classification_record(causal_lm_train_kwargs):
+def test_train_model_classification_record(causal_lm_train_kwargs, set_cpu_device):
     """Ensure that we can train a model on some toy data for 1+ steps & run inference."""
     patch_kwargs = {
         "num_epochs": 1,
diff --git a/tests/resources/test_pretrained_model.py b/tests/resources/test_pretrained_model.py
index 0b377e28..04d5062e 100644
--- a/tests/resources/test_pretrained_model.py
+++ b/tests/resources/test_pretrained_model.py
@@ -128,6 +128,7 @@ def test_causal_lm_tok_output_correctness(models_cache_dir):
         max_source_length=100,
         max_target_length=100,
         verbalizer="{{input}}",
+        task_ids=0
     )
     input_tok = causal_lm.tokenizer.encode(sample.input)
     output_tok = causal_lm.tokenizer.encode(sample.output)
@@ -170,6 +171,7 @@ def test_seq2seq_tokenize_func_contains_unwrapped_stream(models_cache_dir):
         max_source_length=100,
         max_target_length=100,
         verbalizer="{{input}}",
+        task_ids=0,
     )
     tok_res = tok_func(GenerationTrainRecord(input="hello", output="world"))
     map_stream = SAMPLE_TRAINING_DATA.map(tok_func)
@@ -195,6 +197,7 @@ def test_seq2seq_tok_output_correctness(models_cache_dir):
         max_source_length=20,
         max_target_length=20,
         verbalizer="{{input}}",
+        task_ids=0,
     )
     input_tok = seq2seq.tokenizer.encode(sample.input)
     output_tok = seq2seq.tokenizer.encode(sample.output)

From 0c2df95fcee982c8771315fc77c21107644117cc Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Sun, 30 Jul 2023 14:19:25 -0500
Subject: [PATCH 08/16] :art: Fix formatting

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 caikit_nlp/modules/text_generation/peft_prompt_tuning.py | 4 ++--
 tests/fixtures/__init__.py                               | 3 ++-
 tests/modules/text_generation/test_fine_tuning.py        | 7 ++++++-
 tests/modules/text_generation/test_peft_prompt_tuning.py | 2 +-
 tests/resources/test_pretrained_model.py                 | 2 +-
 5 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/caikit_nlp/modules/text_generation/peft_prompt_tuning.py b/caikit_nlp/modules/text_generation/peft_prompt_tuning.py
index 60c6efcc..4f4c67a7 100644
--- a/caikit_nlp/modules/text_generation/peft_prompt_tuning.py
+++ b/caikit_nlp/modules/text_generation/peft_prompt_tuning.py
@@ -606,7 +606,7 @@ def load(
         model_path: str,
         torch_dtype: str = None,
         device: str = _DETECT_DEVICE,  # TODO: Union[int, str]
-        ) -> "PeftPromptTuning":
+    ) -> "PeftPromptTuning":
         """Load a PEFT prompt tuning model. This method will currently fail if the original
         model was not saved with the arg value save_base_model=True.
 
@@ -1070,7 +1070,7 @@ def _execute_train_loop(
         # Configure accelerator for gradient accumulation
         accelerator_args = {
             "gradient_accumulation_steps": accumulate_steps,
-            "device_placement": True
+            "device_placement": True,
         }
 
         accelerator = Accelerator(**accelerator_args)
diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py
index ffc8f187..324cdee3 100644
--- a/tests/fixtures/__init__.py
+++ b/tests/fixtures/__init__.py
@@ -41,10 +41,11 @@ def set_cpu_device(request):
     """
     visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "")
     os.environ["CUDA_VISIBLE_DEVICES"] = ""
-    with mock.patch.object(torch.cuda, 'is_available', return_value=False):
+    with mock.patch.object(torch.cuda, "is_available", return_value=False):
         yield
     os.environ["CUDA_VISIBLE_DEVICES"] = visible_devices
 
+
 @pytest.fixture
 def disable_wip(request):
     """Fixture to temporarily disable wip decorator"""
diff --git a/tests/modules/text_generation/test_fine_tuning.py b/tests/modules/text_generation/test_fine_tuning.py
index 458e8794..d3c42741 100644
--- a/tests/modules/text_generation/test_fine_tuning.py
+++ b/tests/modules/text_generation/test_fine_tuning.py
@@ -11,7 +11,12 @@
 from caikit_nlp.data_model import GenerationTrainRecord
 from caikit_nlp.modules.text_generation import FineTuning
 from caikit_nlp.resources.pretrained_model import HFAutoCausalLM, HFAutoSeq2SeqLM
-from tests.fixtures import CAUSAL_LM_MODEL, SEQ2SEQ_LM_MODEL, disable_wip, set_cpu_device
+from tests.fixtures import (
+    CAUSAL_LM_MODEL,
+    SEQ2SEQ_LM_MODEL,
+    disable_wip,
+    set_cpu_device,
+)
 
 
 def test_train_model_seq2seq(disable_wip, set_cpu_device):
diff --git a/tests/modules/text_generation/test_peft_prompt_tuning.py b/tests/modules/text_generation/test_peft_prompt_tuning.py
index 22361b9d..907338d0 100644
--- a/tests/modules/text_generation/test_peft_prompt_tuning.py
+++ b/tests/modules/text_generation/test_peft_prompt_tuning.py
@@ -28,9 +28,9 @@
 from tests.fixtures import (
     causal_lm_dummy_model,
     causal_lm_train_kwargs,
-    set_cpu_device,
     seq2seq_lm_dummy_model,
     seq2seq_lm_train_kwargs,
+    set_cpu_device,
 )
 import caikit_nlp
 
diff --git a/tests/resources/test_pretrained_model.py b/tests/resources/test_pretrained_model.py
index 04d5062e..d7e5a748 100644
--- a/tests/resources/test_pretrained_model.py
+++ b/tests/resources/test_pretrained_model.py
@@ -128,7 +128,7 @@ def test_causal_lm_tok_output_correctness(models_cache_dir):
         max_source_length=100,
         max_target_length=100,
         verbalizer="{{input}}",
-        task_ids=0
+        task_ids=0,
     )
     input_tok = causal_lm.tokenizer.encode(sample.input)
     output_tok = causal_lm.tokenizer.encode(sample.output)

From 9230f4e7c665be8034a94897344918a007cbd45d Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Sun, 30 Jul 2023 18:24:58 -0500
Subject: [PATCH 09/16] :white_check_mark::sparkles: Make fine-tuning work for
 causal lm models and make tests pass

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 .../modules/text_generation/fine_tuning.py    | 28 ++++++++++++-----
 .../pretrained_model/hf_auto_causal_lm.py     | 31 ++++++++++++++++++-
 .../text_generation/test_fine_tuning.py       |  6 ++--
 3 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/caikit_nlp/modules/text_generation/fine_tuning.py b/caikit_nlp/modules/text_generation/fine_tuning.py
index 38e907df..0f50bd68 100644
--- a/caikit_nlp/modules/text_generation/fine_tuning.py
+++ b/caikit_nlp/modules/text_generation/fine_tuning.py
@@ -15,7 +15,7 @@
 
 # Third Party
 from torch.utils.data import IterableDataset
-from transformers import AutoConfig, AutoTokenizer, Trainer
+from transformers import AutoConfig, AutoTokenizer, Seq2SeqTrainer, Trainer
 import torch
 
 # First Party
@@ -228,13 +228,27 @@ def run(
             # and thus the device placement be according to training strategy,
             # its better to let Trainer handle the evaluation / prediction
 
-            # TODO: Add support for passing extra arguments to prediction_step
+            generate_args = {
+                "prediction_loss_only": False,
+            }
+            if isinstance(self.model, Seq2SeqTrainer):
+                generate_args["max_new_tokens"] = max_new_tokens
+                generate_args["min_new_tokens"] = min_new_tokens
+            else:
+                # NOTE: Currently the default trainer doesn't support easy way to run individual
+                # samples without converting them into Datasets etc. There is a
+                # predict_with_generate flag, but it doesn't do anything.
+                # Applicable for transformers==4.31.0
+                error(
+                    "<NLP39984681E>",
+                    NotImplementedError(
+                        f"Generation on {type(self.model)} not support \
+                      currently! Please try saving and running this model in TGIS."
+                    ),
+                )
+
             _, generated_tokens, _ = self.model.prediction_step(
-                self.model.model,
-                tok_tensors,
-                prediction_loss_only=False,
-                max_new_tokens=max_new_tokens,
-                min_new_tokens=min_new_tokens,
+                self.model.model, tok_tensors, **generate_args
             )
 
             generated_text = self.tokenizer.batch_decode(
diff --git a/caikit_nlp/resources/pretrained_model/hf_auto_causal_lm.py b/caikit_nlp/resources/pretrained_model/hf_auto_causal_lm.py
index 6a832ac5..30c0be20 100644
--- a/caikit_nlp/resources/pretrained_model/hf_auto_causal_lm.py
+++ b/caikit_nlp/resources/pretrained_model/hf_auto_causal_lm.py
@@ -19,7 +19,7 @@
 from typing import Callable, Tuple, Union
 
 # Third Party
-from transformers import AutoModelForCausalLM
+from transformers import AutoModelForCausalLM, DataCollatorForLanguageModeling
 from transformers.models.auto import modeling_auto
 
 # First Party
@@ -129,3 +129,32 @@ def generator_func():
             return DataStream(generator_func)
 
         return (tokenize_function_language_model, True)
+
+    def _get_data_collator(self, **kwargs):
+        """Function to return appropriate data collator based on resource.
+
+        DataCollatorForLanguageModeling is used here which will dynamically
+        padded to maximum length of a batch if they are not all of the same
+        length.
+
+        NOTE: If mlm (masked language modeling) is not passed in kwargs,
+        this function will automatically set it to `False`.
+
+        Args:
+            **kwargs:
+                All the keyword arguments passed to this function
+                will get filtered out to appropriate ones that are
+                applicable to implemented data collator.
+        Returns:
+            transformers.DataCollator
+        """
+
+        applicable_args = ["mlm", "pad_to_multiple_of"]
+        collator_kwargs = {key: kwargs[key] for key in applicable_args if key in kwargs}
+
+        if "mlm" not in collator_kwargs:
+            collator_kwargs["mlm"] = False
+
+        return DataCollatorForLanguageModeling(
+            tokenizer=self._tokenizer, return_tensors="pt", **collator_kwargs
+        )
diff --git a/tests/modules/text_generation/test_fine_tuning.py b/tests/modules/text_generation/test_fine_tuning.py
index d3c42741..64060961 100644
--- a/tests/modules/text_generation/test_fine_tuning.py
+++ b/tests/modules/text_generation/test_fine_tuning.py
@@ -65,9 +65,9 @@ def test_train_model_causallm(disable_wip, set_cpu_device):
     }
     model = FineTuning.train(**train_kwargs)
     assert isinstance(model.model, Trainer)
-    # Ensure that we can get something out of it
-    pred = model.run("@bar what a cute cat!")
-    assert isinstance(pred, GeneratedTextResult)
+
+    with pytest.raises(NotImplementedError):
+        model.run("@bar what a cute cat!")
 
 
 ############################## Error Cases ################################

From f46897389966a02ebda5927f97dbaf797e70c747 Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Tue, 1 Aug 2023 12:48:49 -0500
Subject: [PATCH 10/16] :wrench: Make review changes and add docstring for
 arguments

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 .../modules/text_generation/fine_tuning.py    | 37 ++++++++++++++++++-
 .../text_generation/peft_prompt_tuning.py     | 15 ++++----
 caikit_nlp/resources/pretrained_model/base.py |  4 ++
 .../pretrained_model/hf_auto_seq2seq_lm.py    |  4 ++
 scripts/dump_apis.sh                          | 14 +++----
 5 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/caikit_nlp/modules/text_generation/fine_tuning.py b/caikit_nlp/modules/text_generation/fine_tuning.py
index 0f50bd68..5ed10c96 100644
--- a/caikit_nlp/modules/text_generation/fine_tuning.py
+++ b/caikit_nlp/modules/text_generation/fine_tuning.py
@@ -78,7 +78,42 @@ def train(
         **training_arguments,
     ):
         """
-        # FIXME: Below is currently configured for Seq2Seq only
+        Fine-tune a CausalLM or Seq2seq text generation model.
+
+        Args:
+            base_model:  Union[str, caikit_nlp.resources.pretrained_model.base.PretrainedModelBase]
+                Base resource model used for underlying generation.
+            train_stream: DataStream[GenerationTrainRecord] or DataStream[ClassificationTrainRecord]
+                Data to be used for training the prompt vectors of the generation model.
+            torch_dtype: str
+                TODO: Optional[Union[torch.dtype, str]]
+                Data type to use for training/inference of the underlying text generation model.
+                If no value is provided, we pull from torch_dtype in config. If an in memory
+                resource is provided which does not match the specified data type, the model
+                underpinning the resource will be converted in place to the correct torch dtype.
+            max_source_length: int
+                Max length of input sequences being considered. Default: 256.
+            max_target_length: int
+                Max length of target sequences being predicted. Default: 128.
+            batch_size: int
+                Batch sized to be used for training / evaluation data. Default: 8.
+            num_epochs: int
+                Number of epochs to tune the model. Default: 20.
+            accumulate_steps: int
+                Number of steps to use for gradient accumulation. Default: 1.
+            lr: float
+                Learning rate to be used while tuning model. Default: 2e-5.
+            checkpoint_dir: str
+                Directory where model predictions and checkpoints will be written
+            **training_arguments:
+                Arguments supported by HF Training Arguments.
+                TrainingArguments:
+                    https://huggingface.co/docs/transformers/v4.30.0/en/main_classes/trainer#transformers.TrainingArguments
+                Seq2SeqTrainingArguments:
+                    https://huggingface.co/docs/transformers/v4.30.0/en/main_classes/trainer#transformers.Seq2SeqTrainingArguments
+        Returns:
+            FineTuning
+                Instance of this class with fine-tuned models.
         """
 
         torch_dtype = get_torch_dtype(torch_dtype)
diff --git a/caikit_nlp/modules/text_generation/peft_prompt_tuning.py b/caikit_nlp/modules/text_generation/peft_prompt_tuning.py
index 4f4c67a7..2f4049c8 100644
--- a/caikit_nlp/modules/text_generation/peft_prompt_tuning.py
+++ b/caikit_nlp/modules/text_generation/peft_prompt_tuning.py
@@ -173,6 +173,7 @@ def __del__(self):
     def run(
         self,
         text: str,
+        device: Optional[Union[str, int]] = _DETECT_DEVICE,
         max_new_tokens=20,
         min_new_tokens=0,
     ) -> GeneratedTextResult:
@@ -181,6 +182,8 @@ def run(
         Args:
             text: str
                 Input string to be used to the generation model.
+            device: Optional[Union[str, int]]
+                Device on which we should run inference; by default, we use the detected device.
             max_new_tokens: int
                 The maximum numbers of tokens to generate.
                 Default: 20
@@ -197,7 +200,8 @@ def run(
         # Apply the tokenizer to the sample text & move to correct device
         tok_tensors = self.tokenizer(verbalized_text, return_tensors="pt")
 
-        inputs = {k: v.to(self.model.device) for k, v in tok_tensors.items()}
+        device = PeftPromptTuning._get_device(device)
+        inputs = {k: v.to(device) for k, v in tok_tensors.items()}
         with torch.no_grad():
             # Run tokenized tensors through the rest of the PEFT model
             outputs = self.model.generate(
@@ -1067,13 +1071,10 @@ def _execute_train_loop(
             num_warmup_steps=0,
             num_training_steps=(len(train_dataloader) * num_epochs),
         )
-        # Configure accelerator for gradient accumulation
-        accelerator_args = {
-            "gradient_accumulation_steps": accumulate_steps,
-            "device_placement": True,
-        }
 
-        accelerator = Accelerator(**accelerator_args)
+        accelerator = Accelerator(
+            gradient_accumulation_steps=accumulate_steps, device_placement=True
+        )
 
         for epoch in range(num_epochs):
             model.train()
diff --git a/caikit_nlp/resources/pretrained_model/base.py b/caikit_nlp/resources/pretrained_model/base.py
index e4a732b2..87491917 100644
--- a/caikit_nlp/resources/pretrained_model/base.py
+++ b/caikit_nlp/resources/pretrained_model/base.py
@@ -247,6 +247,10 @@ def get_trainer(
         **kwargs,
     ):
         """
+        Args:
+            *kwargs: arguments supported by HF TrainingArguments:
+            https://huggingface.co/docs/transformers/v4.30.0/en/main_classes/trainer#transformers.TrainingArguments
+
         NOTE: following parameters are not supported currently:
             1. model_init
             2. compute_metrics
diff --git a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
index 880b6db1..fdea36d8 100644
--- a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
+++ b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
@@ -82,6 +82,10 @@ def get_trainer(
         **kwargs
     ):
         """
+        Args:
+            *kwargs: arguments supported by HF Seq2SeqTrainingArguments:
+            https://huggingface.co/docs/transformers/v4.30.0/en/main_classes/trainer#transformers.Seq2SeqTrainingArguments
+
         NOTE: following parameters are not supported currently:
             1. model_init
             2. compute_metrics
diff --git a/scripts/dump_apis.sh b/scripts/dump_apis.sh
index f277a827..249704f4 100755
--- a/scripts/dump_apis.sh
+++ b/scripts/dump_apis.sh
@@ -1,18 +1,18 @@
 #!/usr/bin/env bash
 
 # Make a directory with interfaces
-http_interface_dir="generated_interfaces/http"
-grpc_interface_dir="generated_interfaces/grpc"
+http_interface_dir="temp_dump/http"
+grpc_interface_dir="temp_dump/grpc"
 mkdir -p $http_interface_dir
 mkdir -p $grpc_interface_dir
 
 # Run the HTTP server in the background
-RUNTIME_LIBRARY=caikit_nlp python -m caikit.runtime.http_server &
-http_pid=$!
+# RUNTIME_LIBRARY=caikit_nlp python -m caikit.runtime.http_server &
+# http_pid=$!
 
-# Sleep for a bit and then call it to get the swagger doc
-sleep 5
-curl http://localhost:8080/openapi.json | jq > $http_interface_dir/openapi.json
+# # Sleep for a bit and then call it to get the swagger doc
+# sleep 5
+# curl http://localhost:8080/openapi.json | jq > $http_interface_dir/openapi.json
 
 # Kill the HTTP server and wait for it to die
 kill -9 $http_pid

From 4eda03b3a655d9688304b4981b07e5ef2355389e Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Tue, 1 Aug 2023 14:10:44 -0500
Subject: [PATCH 11/16] :sparkles: Add support for model.generate right after
 training by saving and loading the model

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 .../modules/text_generation/fine_tuning.py    | 126 +++++++++---------
 .../text_generation/test_fine_tuning.py       |   9 +-
 2 files changed, 70 insertions(+), 65 deletions(-)

diff --git a/caikit_nlp/modules/text_generation/fine_tuning.py b/caikit_nlp/modules/text_generation/fine_tuning.py
index 5ed10c96..0a70866f 100644
--- a/caikit_nlp/modules/text_generation/fine_tuning.py
+++ b/caikit_nlp/modules/text_generation/fine_tuning.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Standard
+from typing import Optional
 
 # Third Party
 from torch.utils.data import IterableDataset
@@ -28,7 +30,7 @@
 
 # Local
 from ...data_model import GenerationTrainRecord
-from ...resources.pretrained_model.base import PretrainedModelBase
+from ...resources.pretrained_model import PretrainedModelBase, HFAutoCausalLM, HFAutoSeq2SeqLM
 from ...toolkit.data_stream_wrapper import SimpleIterableStreamWrapper
 from ...toolkit.data_type_utils import get_torch_dtype
 
@@ -49,17 +51,25 @@
 class FineTuning(ModuleBase):
     """Module to provide fine-tuning support for text generation task"""
 
-    def __init__(self, tokenizer, model):
+    supported_resources = [HFAutoCausalLM, HFAutoSeq2SeqLM]
+
+    def __init__(
+            self,
+            tokenizer,
+            model,
+            bos_token: Optional[str] = None,
+            sep_token: Optional[str] = None,
+            eos_token: Optional[str] = None,
+            pad_token: Optional[str] = None,
+        ):
         super().__init__()
 
         self.tokenizer = tokenizer
-        # NOTE: self.model here can also be HF trainer. This is because
-        # if we have just trained the model then the models weights might be
-        # available in different devices (and configuration), depending on
-        # how it was trained. For now (July 10, 2023), we are not trying to
-        # extract the model out from trainer itself, since that would require
-        # us to essentially save it or reconstruct it to do normal inferring.
         self.model = model
+        self._bos_token = bos_token
+        self._sep_token = sep_token
+        self._eos_token = eos_token
+        self._pad_token = pad_token
 
     @classmethod
     def train(
@@ -122,11 +132,12 @@ def train(
         # text_generation module. In future, we would want to consolidate this into
         # a base class or a toolkit function
         # pylint: disable=duplicate-code
+        resource_type = None
+
         ## Load base model
         if isinstance(base_model, str):
             model_config = AutoConfig.from_pretrained(base_model)
 
-            resource_type = None
             for resource in cls.supported_resources:
                 if model_config.model_type in resource.SUPPORTED_MODEL_TYPES:
                     resource_type = resource
@@ -140,8 +151,13 @@ def train(
                     ),
                 )
             log.debug("Bootstrapping base resource [%s]", base_model)
+            breakpoint()
             base_model = resource_type.bootstrap(base_model, torch_dtype=torch_dtype)
 
+        else:
+            # base_model is actually a resource object
+            resource_type = type(base_model)
+
         error.type_check("<NLP03221895E>", PretrainedModelBase, base_model=base_model)
         ## Generate data loader from stream
         training_dataset: IterableDataset = cls._preprocess_function(
@@ -217,17 +233,23 @@ def train(
 
         # Start training via Trainer.train function
         trainer.train()
-        # NOTE: By default the model would be available in different ways
-        # depending on where and how it was trained. So we need to fetch the model
-        # from the trainer depending on the training method, like fsdp, ddp etc.
-        # For simplicity, currently we will use trainer as the model since it anyways
-        # enable the `predict` function on it and has all the layers of the model
-        # distributed already, so it will be most optimized to use trainer to
-        # perform prediction at this stage.
+
+        # save the model temporarily and reload it
+        # this is done, since otherwise the model might be distributed in different
+        # devices, in which case its better to use trainer's `prediction_step`
+        # functions, but then, they don't always give API similar to `generate`
+        # and thus cause incompatibilities in `run` function
+        trainer.save_model(checkpoint_dir)
+
+        model = resource_type.bootstrap(checkpoint_dir, checkpoint_dir, torch_dtype=torch_dtype)
 
         return cls(
-            tokenizer=base_model.tokenizer,
-            model=trainer,
+            tokenizer=model.tokenizer,
+            model=model,
+            bos_token=model.tokenizer.bos_token or None,
+            sep_token=model.tokenizer.sep_token or None,
+            eos_token=model.tokenizer.eos_token or None,
+            pad_token=model.tokenizer.pad_token or None,
         )
 
     # pylint: disable=unused-argument
@@ -252,53 +274,35 @@ def run(
             GeneratedTextResult
                 Generated text result
         """
-        if isinstance(self.model, Trainer):
-            # Apply the tokenizer to the sample text & move to correct device
-            tok_tensors = self.tokenizer(text, return_tensors="pt")
-            # NOTE: below function is prediction on trainer, for which we need to supply
-            # the actual underlying model as well
-            # NOTE: We are using prediction_step instead of calling `self.model.generate`
-            # because this way HF Trainer automatically handles device placement of the
-            # data and model. Since the model is with Trainer at this point
-            # and thus the device placement be according to training strategy,
-            # its better to let Trainer handle the evaluation / prediction
-
-            generate_args = {
-                "prediction_loss_only": False,
-            }
-            if isinstance(self.model, Seq2SeqTrainer):
-                generate_args["max_new_tokens"] = max_new_tokens
-                generate_args["min_new_tokens"] = min_new_tokens
-            else:
-                # NOTE: Currently the default trainer doesn't support easy way to run individual
-                # samples without converting them into Datasets etc. There is a
-                # predict_with_generate flag, but it doesn't do anything.
-                # Applicable for transformers==4.31.0
-                error(
-                    "<NLP39984681E>",
-                    NotImplementedError(
-                        f"Generation on {type(self.model)} not support \
-                      currently! Please try saving and running this model in TGIS."
-                    ),
-                )
-
-            _, generated_tokens, _ = self.model.prediction_step(
-                self.model.model, tok_tensors, **generate_args
-            )
 
-            generated_text = self.tokenizer.batch_decode(
-                generated_tokens.detach().cpu().numpy(), skip_special_tokens=True
-            )[0]
+        inputs = self.model.tokenizer(text, return_tensors="pt")
+        generate_ids = self.model.model.generate(
+            input_ids=inputs["input_ids"],
+            max_new_tokens=max_new_tokens,
+            min_new_tokens=min_new_tokens,
+            use_cache=True,
+        )
 
-        else:
-            error(
-                "<NLP38929392E>",
-                NotImplementedError(
-                    "model prediction on pre-finetuned model currently not supported"
-                ),
+        token_count = generate_ids.size(1) - 1
+        preds = [
+            self.model.tokenizer.decode(
+                g, skip_special_tokens=True, clean_up_tokenization_spaces=True
             )
+            for g in generate_ids
+        ]
+        if generate_ids[0][-1].item() == self._eos_token:
+            finish_reason = "EOS_TOKEN"
+        elif generate_ids.size(1) - 1 == max_new_tokens:
+            finish_reason = "MAX_TOKENS"
+        else:
+            finish_reason = "OTHER"
 
-        return GeneratedTextResult(generated_text=generated_text)
+        return GeneratedTextResult(
+            generated_tokens=token_count,
+            generated_text=preds[0],
+            finish_reason=finish_reason,
+            producer_id=self.PRODUCER_ID,
+        )
 
     ################################## Private Functions ###########################################
 
diff --git a/tests/modules/text_generation/test_fine_tuning.py b/tests/modules/text_generation/test_fine_tuning.py
index 64060961..a17f5ffa 100644
--- a/tests/modules/text_generation/test_fine_tuning.py
+++ b/tests/modules/text_generation/test_fine_tuning.py
@@ -40,7 +40,7 @@ def test_train_model_seq2seq(disable_wip, set_cpu_device):
         "torch_dtype": torch.float32,
     }
     model = FineTuning.train(**train_kwargs)
-    assert isinstance(model.model, Trainer)
+    assert isinstance(model.model, HFAutoSeq2SeqLM)
     # Ensure that we can get something out of it
     pred = model.run("@bar what a cute cat!")
     assert isinstance(pred, GeneratedTextResult)
@@ -64,10 +64,11 @@ def test_train_model_causallm(disable_wip, set_cpu_device):
         "torch_dtype": torch.float32,
     }
     model = FineTuning.train(**train_kwargs)
-    assert isinstance(model.model, Trainer)
+    assert isinstance(model.model, HFAutoCausalLM)
 
-    with pytest.raises(NotImplementedError):
-        model.run("@bar what a cute cat!")
+    # Ensure that we can get something out of it
+    pred = model.run("@bar what a cute cat!")
+    assert isinstance(pred, GeneratedTextResult)
 
 
 ############################## Error Cases ################################

From ed7bbe639a197b309ff52b6c28076f4fdc7cf996 Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Tue, 1 Aug 2023 14:11:46 -0500
Subject: [PATCH 12/16] :art: Fix formatting and linting

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 .../modules/text_generation/fine_tuning.py    | 29 +++++++++++--------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/caikit_nlp/modules/text_generation/fine_tuning.py b/caikit_nlp/modules/text_generation/fine_tuning.py
index 0a70866f..dab6a596 100644
--- a/caikit_nlp/modules/text_generation/fine_tuning.py
+++ b/caikit_nlp/modules/text_generation/fine_tuning.py
@@ -17,7 +17,7 @@
 
 # Third Party
 from torch.utils.data import IterableDataset
-from transformers import AutoConfig, AutoTokenizer, Seq2SeqTrainer, Trainer
+from transformers import AutoConfig, AutoTokenizer
 import torch
 
 # First Party
@@ -30,7 +30,11 @@
 
 # Local
 from ...data_model import GenerationTrainRecord
-from ...resources.pretrained_model import PretrainedModelBase, HFAutoCausalLM, HFAutoSeq2SeqLM
+from ...resources.pretrained_model import (
+    HFAutoCausalLM,
+    HFAutoSeq2SeqLM,
+    PretrainedModelBase,
+)
 from ...toolkit.data_stream_wrapper import SimpleIterableStreamWrapper
 from ...toolkit.data_type_utils import get_torch_dtype
 
@@ -54,14 +58,14 @@ class FineTuning(ModuleBase):
     supported_resources = [HFAutoCausalLM, HFAutoSeq2SeqLM]
 
     def __init__(
-            self,
-            tokenizer,
-            model,
-            bos_token: Optional[str] = None,
-            sep_token: Optional[str] = None,
-            eos_token: Optional[str] = None,
-            pad_token: Optional[str] = None,
-        ):
+        self,
+        tokenizer,
+        model,
+        bos_token: Optional[str] = None,
+        sep_token: Optional[str] = None,
+        eos_token: Optional[str] = None,
+        pad_token: Optional[str] = None,
+    ):
         super().__init__()
 
         self.tokenizer = tokenizer
@@ -151,7 +155,6 @@ def train(
                     ),
                 )
             log.debug("Bootstrapping base resource [%s]", base_model)
-            breakpoint()
             base_model = resource_type.bootstrap(base_model, torch_dtype=torch_dtype)
 
         else:
@@ -241,7 +244,9 @@ def train(
         # and thus cause incompatibilities in `run` function
         trainer.save_model(checkpoint_dir)
 
-        model = resource_type.bootstrap(checkpoint_dir, checkpoint_dir, torch_dtype=torch_dtype)
+        model = resource_type.bootstrap(
+            checkpoint_dir, checkpoint_dir, torch_dtype=torch_dtype
+        )
 
         return cls(
             tokenizer=model.tokenizer,

From 489076146150190fc0c4172065690f7c5bcf7e43 Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Tue, 1 Aug 2023 15:04:31 -0500
Subject: [PATCH 13/16] :bug: Fix default verbalizer declaration in fine_tuning
 module

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 caikit_nlp/modules/text_generation/fine_tuning.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/caikit_nlp/modules/text_generation/fine_tuning.py b/caikit_nlp/modules/text_generation/fine_tuning.py
index dab6a596..2ea8561f 100644
--- a/caikit_nlp/modules/text_generation/fine_tuning.py
+++ b/caikit_nlp/modules/text_generation/fine_tuning.py
@@ -322,11 +322,14 @@ def _preprocess_function(
     ):
         """Pre-process each example to get it prepared for training."""
 
+        # TODO: We are using a default verbalizer which is strictly tied to
+        # source training record currently. We need to figure out a better
+        # way to make verbalizer optional for build_task_tokenize_function
         (
             tokenize_function,
             requires_unwrapping,
         ) = base_model.build_task_tokenize_function(
-            tokenizer, max_source_length, max_target_length, verbalizer=""
+            tokenizer, max_source_length, max_target_length, verbalizer="{{input}}"
         )
         mapped_stream = train_stream.map(tokenize_function)
         if requires_unwrapping:

From d3d962c35b6629b57848242805b636cfed62e20f Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Tue, 1 Aug 2023 17:26:43 -0500
Subject: [PATCH 14/16] :wrench: Update parameters for trainer and add random
 seed

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 caikit_nlp/modules/text_generation/fine_tuning.py           | 4 ++++
 caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/caikit_nlp/modules/text_generation/fine_tuning.py b/caikit_nlp/modules/text_generation/fine_tuning.py
index 2ea8561f..3c277bc0 100644
--- a/caikit_nlp/modules/text_generation/fine_tuning.py
+++ b/caikit_nlp/modules/text_generation/fine_tuning.py
@@ -55,6 +55,7 @@
 class FineTuning(ModuleBase):
     """Module to provide fine-tuning support for text generation task"""
 
+    RANDOM_SEED = 73
     supported_resources = [HFAutoCausalLM, HFAutoSeq2SeqLM]
 
     def __init__(
@@ -86,6 +87,7 @@ def train(
         batch_size: int = 8,
         num_epochs: int = 5,
         accumulate_steps: int = 32,
+        random_seed: int = RANDOM_SEED,
         lr: float = 2e-5,
         # Directory where model predictions and checkpoints will be written
         checkpoint_dir: str = "/tmp",
@@ -201,6 +203,7 @@ def train(
             "per_device_train_batch_size": batch_size,
             "per_device_eval_batch_size": batch_size,
             "num_train_epochs": num_epochs,
+            "seed": random_seed,
             # NOTE: We have disabled evaluation for now
             "do_eval": False,
             # "evaluation_strategy ": "epoch",
@@ -214,6 +217,7 @@ def train(
             "gradient_accumulation_steps": accumulate_steps,
             "eval_accumulation_steps": accumulate_steps,
             # eval_steps=1,
+            # load_best_model_at_end
             **training_arguments,
             **dtype_based_params,
         }
diff --git a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
index fdea36d8..bdd69aa1 100644
--- a/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
+++ b/caikit_nlp/resources/pretrained_model/hf_auto_seq2seq_lm.py
@@ -93,7 +93,8 @@ def get_trainer(
             4. preprocess_logits_for_metrics
         """
 
-        training_args = Seq2SeqTrainingArguments(predict_with_generate=True, **kwargs)
+        # NOTE: predict_with_generate is incompatible with fsdp
+        training_args = Seq2SeqTrainingArguments(**kwargs)
 
         # pylint: disable=duplicate-code
         # TODO: Fetch DataCollator either from property of this

From f84a357097444d439435c4a11126356e0a0eef78 Mon Sep 17 00:00:00 2001
From: gkumbhat <kumbhat.gaurav@gmail.com>
Date: Wed, 2 Aug 2023 17:45:38 -0500
Subject: [PATCH 15/16] :rewind: Revert back dump_api script changes

Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 scripts/dump_apis.sh | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/scripts/dump_apis.sh b/scripts/dump_apis.sh
index 249704f4..f277a827 100755
--- a/scripts/dump_apis.sh
+++ b/scripts/dump_apis.sh
@@ -1,18 +1,18 @@
 #!/usr/bin/env bash
 
 # Make a directory with interfaces
-http_interface_dir="temp_dump/http"
-grpc_interface_dir="temp_dump/grpc"
+http_interface_dir="generated_interfaces/http"
+grpc_interface_dir="generated_interfaces/grpc"
 mkdir -p $http_interface_dir
 mkdir -p $grpc_interface_dir
 
 # Run the HTTP server in the background
-# RUNTIME_LIBRARY=caikit_nlp python -m caikit.runtime.http_server &
-# http_pid=$!
+RUNTIME_LIBRARY=caikit_nlp python -m caikit.runtime.http_server &
+http_pid=$!
 
-# # Sleep for a bit and then call it to get the swagger doc
-# sleep 5
-# curl http://localhost:8080/openapi.json | jq > $http_interface_dir/openapi.json
+# Sleep for a bit and then call it to get the swagger doc
+sleep 5
+curl http://localhost:8080/openapi.json | jq > $http_interface_dir/openapi.json
 
 # Kill the HTTP server and wait for it to die
 kill -9 $http_pid

From 664a3d56b848f75a5147285a1caf2faf4e0a0ff6 Mon Sep 17 00:00:00 2001
From: Gaurav Kumbhat <kumbhat.gaurav@gmail.com>
Date: Wed, 2 Aug 2023 17:48:31 -0500
Subject: [PATCH 16/16] Apply suggestions from code review

Co-authored-by: Alex Brooks <alex.brooks@ibm.com>
Signed-off-by: Gaurav Kumbhat <kumbhat.gaurav@gmail.com>
Signed-off-by: gkumbhat <kumbhat.gaurav@gmail.com>
---
 caikit_nlp/modules/text_generation/fine_tuning.py | 2 +-
 caikit_nlp/resources/pretrained_model/base.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/caikit_nlp/modules/text_generation/fine_tuning.py b/caikit_nlp/modules/text_generation/fine_tuning.py
index 3c277bc0..f3933516 100644
--- a/caikit_nlp/modules/text_generation/fine_tuning.py
+++ b/caikit_nlp/modules/text_generation/fine_tuning.py
@@ -100,7 +100,7 @@ def train(
             base_model:  Union[str, caikit_nlp.resources.pretrained_model.base.PretrainedModelBase]
                 Base resource model used for underlying generation.
             train_stream: DataStream[GenerationTrainRecord] or DataStream[ClassificationTrainRecord]
-                Data to be used for training the prompt vectors of the generation model.
+                Data to be used for fine-tuning the generation model.
             torch_dtype: str
                 TODO: Optional[Union[torch.dtype, str]]
                 Data type to use for training/inference of the underlying text generation model.
diff --git a/caikit_nlp/resources/pretrained_model/base.py b/caikit_nlp/resources/pretrained_model/base.py
index 87491917..59bb4d45 100644
--- a/caikit_nlp/resources/pretrained_model/base.py
+++ b/caikit_nlp/resources/pretrained_model/base.py
@@ -248,7 +248,7 @@ def get_trainer(
     ):
         """
         Args:
-            *kwargs: arguments supported by HF TrainingArguments:
+            **kwargs: arguments supported by HF TrainingArguments:
             https://huggingface.co/docs/transformers/v4.30.0/en/main_classes/trainer#transformers.TrainingArguments
 
         NOTE: following parameters are not supported currently: