garbage collection for model

wenting-zhao · wenting-zhao · commit b2691509a062 · 2024-12-12T02:24:45.000-05:00
diff --git a/examples/star/inference.py b/examples/star/inference.py
@@ -1,30 +1,9 @@
-import gc
 from typing import List
 from datasets import Dataset
 from vllm import LLM, SamplingParams
-from utils import generate_prompt
+from utils import generate_prompt, cleanup
 
 
-def cleanup(model):
-    try:
-        import torch
-        import contextlib
-        if torch.cuda.is_available():
-            from vllm.distributed.parallel_state import (
-                destroy_model_parallel, destroy_distributed_environment
-            )
-            destroy_model_parallel()
-            destroy_distributed_environment()
-            del model.llm_engine.model_executor
-            del model
-            with contextlib.suppress(AssertionError):
-                torch.distributed.destroy_process_group()
-            gc.collect()
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
-    except ImportError:
-        del model
-
 def generate_predictions(
     model_name: str, dataset: Dataset, temperature: float = 1.0, n: int = 1
 ) -> List[List[str]]:
@@ -62,8 +41,5 @@ def generate_predictions(
     for output in outputs:
         generated_texts = [one.text for one in output.outputs]
         results.append(generated_texts)
-    cleanup(llm)
+    cleanup(llm, vllm=True)
     return results
-    # out_name = dataset_name.split("/")[-1]
-    # out_name = f"wentingzhao/{out_name}_predictions_{n}"
-    # ds.push_to_hub(out_name)
diff --git a/examples/star/star.py b/examples/star/star.py
@@ -21,6 +21,7 @@ def main():
         ds[split] = ds[split].add_column(name="text", column=texts)
 
     model_name = args.model_name_or_path
+    ds["train"] = ds["train"].select(range(10))
     for i in range(args.iteration):
         # sample
         all_samples = generate_predictions(
diff --git a/examples/star/train.py b/examples/star/train.py
@@ -49,6 +49,8 @@
     get_scheduler,
 )
 
+from utils import cleanup
+
 
 logger = get_logger(__name__)
 
@@ -404,6 +406,9 @@ def tokenize_function(examples):
                 )
             with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
                 json.dump({"perplexity": perplexity}, f)
+    cleanup(model)
+    #cleanup(optimizer)
+    #cleanup(lr_scheduler)
 
 
 if __name__ == "__main__":
diff --git a/examples/star/utils.py b/examples/star/utils.py
@@ -1,4 +1,5 @@
 import argparse
+import gc
 import subprocess
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datasets import Dataset
@@ -275,3 +276,35 @@ def parse_args():
             )
 
     return args
+
+
+def cleanup(model, vllm=False):
+    """
+    Clean up resources associated with the given model.
+
+    Parameters
+    ----------
+    model : Any
+        The model object whose resources are to be cleaned up.
+    """
+    try:
+        import torch
+        import contextlib
+        if torch.cuda.is_available():
+            if vllm:
+                from vllm.distributed.parallel_state import (
+                    destroy_model_parallel, destroy_distributed_environment
+                )
+                destroy_model_parallel()
+                destroy_distributed_environment()
+                del model.llm_engine.model_executor
+            if not vllm:
+                model = model.cpu()
+            del model
+            with contextlib.suppress(AssertionError):
+                torch.distributed.destroy_process_group()
+            gc.collect()
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
+    except ImportError:
+        del model
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,6 +43,7 @@ example = [
     "transformers",
     "setuptools",
     "accelerate",
+    "wandb>=0.19.0",
 ]
 
 [project.urls]
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,8 @@`
`49`	`49`	`get_scheduler,`
`50`	`50`	`)`
`51`	`51`
	`52`	`+from utils import cleanup`
	`53`	`+`
`52`	`54`
`53`	`55`	`logger = get_logger(__name__)`
`54`	`56`
`@@ -404,6 +406,9 @@ def tokenize_function(examples):`
`404`	`406`	`)`
`405`	`407`	`with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:`
`406`	`408`	`json.dump({"perplexity": perplexity}, f)`
	`409`	`+ cleanup(model)`
	`410`	`+ #cleanup(optimizer)`
	`411`	`+ #cleanup(lr_scheduler)`
`407`	`412`
`408`	`413`
`409`	`414`	`if __name__ == "__main__":`
Original file line number	Diff line number	Diff line change
`@@ -43,6 +43,7 @@ example = [`
`43`	`43`	`"transformers",`
`44`	`44`	`"setuptools",`
`45`	`45`	`"accelerate",`
	`46`	`+ "wandb>=0.19.0",`
`46`	`47`	`]`
`47`	`48`
`48`	`49`	`[project.urls]`