update with viptr

felixdittrich92 · felixdittrich92 · commit c3fb81e0cad4 · 2025-04-12T11:00:38.000+02:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,7 +16,7 @@ repos:
       - id: no-commit-to-branch
         args: ['--branch', 'main']
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.11.1
+    rev: v0.11.5
     hooks:
       - id: ruff
         args: [ --fix ]
diff --git a/doctr/models/classification/vip/layers/pytorch.py b/doctr/models/classification/vip/layers/pytorch.py
@@ -246,7 +246,7 @@ def __init__(
                 *conv_sequence_pt(dim, dim, kernel_size=1, groups=dim, bias=False, bn=True, relu=False),
             )
         else:
-            self.sr = nn.Identity() # type: ignore[assignment]
+            self.sr = nn.Identity()  # type: ignore[assignment]
 
         self.local_conv = nn.Conv2d(dim, dim, kernel_size=3, padding=1, groups=dim)
 
diff --git a/doctr/models/classification/vip/pytorch.py b/doctr/models/classification/vip/pytorch.py
@@ -230,6 +230,15 @@ def _init_weights(self, m):
             nn.init.constant_(m.bias, 0)
             nn.init.constant_(m.weight, 1.0)
 
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
 
 def vip_tiny(pretrained: bool = False, **kwargs: Any) -> VIPNet:
     """
@@ -322,7 +331,7 @@ def _vip(
         # The number of classes is not the same as the number of classes in the pretrained model =>
         # remove the last layer weights
         _ignore_keys = ignore_keys if kwargs["num_classes"] != len(default_cfgs[arch]["classes"]) else None
-        load_pretrained_params(model, default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
     return model
 
 
diff --git a/doctr/models/recognition/viptr/pytorch.py b/doctr/models/recognition/viptr/pytorch.py
@@ -91,9 +91,9 @@ def __call__(self, logits: torch.Tensor) -> list[tuple[str, float]]:
 
 
 class VIPTR(RecognitionModel, nn.Module):
-    """Implements a VIPTR architecture as described in `"A Vision Permutable Extractor for Fast and Efficient 
+    """Implements a VIPTR architecture as described in `"A Vision Permutable Extractor for Fast and Efficient
     Scene Text Recognition" <https://arxiv.org/abs/2401.10110>`_.
-    
+
     Args:
         feature_extractor: the backbone serving as feature extractor
         vocab: vocabulary used for encoding
@@ -110,7 +110,6 @@ def __init__(
         exportable: bool = False,
         cfg: dict[str, Any] | None = None,
     ):
-       
         super().__init__()
         self.vocab = vocab
         self.exportable = exportable
@@ -134,6 +133,15 @@ def __init__(
                 if m.bias is not None:
                     nn.init.zeros_(m.bias)
 
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
     def forward(
         self,
         x: torch.Tensor,
@@ -230,7 +238,7 @@ def _viptr(
 
     # Feature extractor
     feat_extractor = IntermediateLayerGetter(
-        backbone_fn(pretrained_backbone, input_shape=_cfg["input_shape"]), # type: ignore[call-arg]
+        backbone_fn(pretrained_backbone, input_shape=_cfg["input_shape"]),  # type: ignore[call-arg]
         {layer: "features"},
     )
 
@@ -244,7 +252,7 @@ def _viptr(
         # The number of classes is not the same as the number of classes in the pretrained model =>
         # remove the last layer weights
         _ignore_keys = ignore_keys if _cfg["vocab"] != default_cfgs[arch]["vocab"] else None
-        load_pretrained_params(model, default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
 
     return model
 

Original file line number	Diff line number	Diff line change
`@@ -246,7 +246,7 @@ def __init__(`
`246`	`246`	`*conv_sequence_pt(dim, dim, kernel_size=1, groups=dim, bias=False, bn=True, relu=False),`
`247`	`247`	`)`
`248`	`248`	`else:`
`249`		`- self.sr = nn.Identity() # type: ignore[assignment]`
	`249`	`+ self.sr = nn.Identity() # type: ignore[assignment]`
`250`	`250`
`251`	`251`	`self.local_conv = nn.Conv2d(dim, dim, kernel_size=3, padding=1, groups=dim)`
`252`	`252`