fix: update model card tags to include 'pruna-ai' for improved categorization (#334)

davidberenstein1957 · begumcig · commit 25efe76e14af · 2025-09-12T15:19:42.000Z
diff --git a/src/pruna/engine/save.py b/src/pruna/engine/save.py
@@ -170,7 +170,7 @@ def save_pruna_model_to_hub(
         template_path = Path(__file__).parent / "hf_hub_utils" / "model_card_template.md"
         # Get the pruna library version from initalized module as OSS or paid so we can use the same method for both
         pruna_library = instance.__module__.split(".")[0] if "." in instance.__module__ else None
-        model_card_data["tags"] = [f"{pruna_library}-ai", "safetensors"]
+        model_card_data["tags"] = list({f"{pruna_library}-ai", "safetensors", "pruna-ai"})
         # Build the template parameters dictionary for clarity and maintainability
         template_params: dict = {
             "repo_id": repo_id,
diff --git a/src/pruna/evaluation/evaluation_agent.py b/src/pruna/evaluation/evaluation_agent.py
@@ -108,7 +108,6 @@ def evaluate(self, model: Any) -> List[MetricResult]:
         pruna_logger.info("Evaluating isolated inference metrics.")
         results.extend(self.compute_stateless_metrics(model, stateless_metrics))
 
-        model.move_to_device("cpu")
         safe_memory_cleanup()
         if self.evaluation_for_first_model:
             self.first_model_results = results
@@ -154,7 +153,8 @@ def prepare_model(self, model: Any) -> PrunaModel:
             pruna_logger.info("Evaluating a base model.")
             is_base = True
 
-        model.inference_handler.log_model_info()
+        if hasattr(model, "inference_handler"):  # Distributers do not have an inference handler
+            model.inference_handler.log_model_info()
         if (
             "batch_size" in self.task.datamodule.dataloader_args
             and self.task.datamodule.dataloader_args["batch_size"] != model.smash_config.batch_size
@@ -169,9 +169,6 @@ def prepare_model(self, model: Any) -> PrunaModel:
                 model.smash_config.batch_size,
             )
 
-        # ensure the model is on the cpu
-        model.move_to_device("cpu")
-
         return model
 
     def update_stateful_metrics(
@@ -199,7 +196,6 @@ def update_stateful_metrics(
         if not single_stateful_metrics and not pairwise_metrics:
             return
 
-        model.move_to_device(self.device)
         for batch_idx, batch in enumerate(self.task.dataloader):
             processed_outputs = model.run_inference(batch, self.device)
 
diff --git a/src/pruna/evaluation/metrics/metric_elapsed_time.py b/src/pruna/evaluation/metrics/metric_elapsed_time.py
@@ -165,11 +165,7 @@ def compute(self, model: PrunaModel, dataloader: DataLoader) -> Dict[str, Any] |
             model,
             dataloader,
             self.n_warmup_iterations,
-            lambda m, x: (
-                m(**x, **m.inference_handler.model_args)  # x is a dict
-                if isinstance(x, dict)
-                else m(x, **m.inference_handler.model_args)  # x is tensor/list
-            ),
+            lambda m, x: (m.run_inference(x)),
         )
 
         # Measurement
diff --git a/tests/common.py b/tests/common.py
@@ -1,6 +1,8 @@
+import ast
 import importlib.util
 import inspect
 import subprocess
+import textwrap
 from pathlib import Path
 from typing import Any, Callable
 
@@ -85,7 +87,11 @@ def run_full_integration(
         smashed_model = algorithm_tester.execute_smash(model, smash_config)
         algorithm_tester.execute_save(smashed_model)
         safe_memory_cleanup()
-        reloaded_model = algorithm_tester.execute_load()
+        reloaded_model = (
+            smashed_model
+            if is_function_unimplemented(algorithm_tester.execute_load)
+            else algorithm_tester.execute_load()
+        )  # noqa: E501
         if device != "accelerate" and not skip_evaluation:
             algorithm_tester.execute_evaluation(reloaded_model, smash_config.data, smash_config["device"])
         if hasattr(reloaded_model, "destroy"):
@@ -296,3 +302,40 @@ def extract_code_blocks_from_node(node: Any, section_name: str) -> None:
             extract_code_blocks_from_node(sec, section_title)
 
     print(f"Code blocks extracted and written to {output_dir}")
+
+
+def is_function_unimplemented(func):
+    """Check if a function is unimplemented."""
+    source = inspect.getsource(func)
+    source = textwrap.dedent(source)
+    tree = ast.parse(source)
+
+    func_def = tree.body[0]
+    if not isinstance(func_def, (ast.FunctionDef, ast.AsyncFunctionDef)):
+        return False
+
+    # remove docstring if present
+    body = func_def.body
+    if (
+        body
+        and isinstance(body[0], ast.Expr)
+        and isinstance(body[0].value, ast.Constant)
+        and isinstance(body[0].value.value, str)
+    ):
+        body = body[1:]
+
+    if len(body) == 1:
+        stmt = body[0]
+        if isinstance(stmt, ast.Pass):  # pass is not implemented
+            return True
+        # ... is not implemented
+        if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Constant) and stmt.value.value == Ellipsis:
+            return True
+        if (
+            isinstance(stmt, ast.Raise)
+            and isinstance(stmt.exc, ast.Call)
+            and getattr(stmt.exc.func, "id", "") == "NotImplementedError"
+        ):  # noqa: E501
+            return True
+
+    return False