From d00c087539600aded6f2bb82598fd2228f2ce961 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Thu, 9 Jan 2025 19:34:40 +0100
Subject: [PATCH 01/23] Add files via upload

---
 torchgeo/trainers/instancesegmentation.py | 174 ++++++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 torchgeo/trainers/instancesegmentation.py

diff --git a/torchgeo/trainers/instancesegmentation.py b/torchgeo/trainers/instancesegmentation.py
new file mode 100644
index 00000000000..9be8f819330
--- /dev/null
+++ b/torchgeo/trainers/instancesegmentation.py
@@ -0,0 +1,174 @@
+from typing import Any                                           # Allows us to annotate arguments and return types of functions
+import torch.nn as nn                                            # PyTorch module for neural network layers
+import torch                                                     # PyTorch for deep learning operations
+from torch import Tensor                                         # Type hint for tensors
+from torchmetrics.detection.mean_ap import MeanAveragePrecision  # Metric to evaluate instance segmentation models
+from torchvision.models.detection import maskrcnn_resnet50_fpn   # Pre-built Mask R-CNN model from PyTorch
+from ultralytics import YOLO  
+from .base import BaseTask  
+
+class InstanceSegmentationTask(BaseTask):
+    """
+    Task class for training and evaluating instance segmentation models.
+
+    This class supports Mask R-CNN and YOLO models and handles the following:
+    - Model configuration
+    - Loss computation
+    - Metric computation (e.g., Mean Average Precision)
+    - Training, validation, testing, and prediction steps
+    """
+
+    def __init__(
+        self,
+        model: str = 'mask_rcnn',           # Model type, e.g., 'mask_rcnn' or 'yolo'
+        backbone: str = 'resnet50',         # Backbone type for Mask R-CNN (ignored for YOLO)
+        weights: str | bool | None = None,  # Pretrained weights or custom checkpoint path
+        num_classes: int = 2,               # Number of classes, including background
+        lr: float = 1e-3,                   # Learning rate for the optimizer
+        patience: int = 10,                 # Patience for the learning rate scheduler
+        freeze_backbone: bool = False,      # Whether to freeze backbone layers (useful for transfer learning)
+    ) -> None:
+        """
+        Constructor for the InstanceSegmentationTask.
+
+        Initializes the hyperparameters, sets up the model and metrics.
+        """
+        self.weights = weights          # Save weights for model initialization
+        super().__init__()              # Initialize the BaseTask class (inherits common functionality)
+        self.save_hyperparameters()     # Save input arguments for later use (e.g., in checkpoints or logs)
+        self.model = None               # Placeholder for the model (to be initialized later)
+        self.validation_outputs = []    # List to store outputs during validation (used for debugging or analysis)
+        self.test_outputs = []          # List to store outputs during testing
+        self.configure_models()         # Call method to set up the model
+        self.configure_metrics()        # Call method to set up metrics
+
+    def configure_models(self) -> None:
+        """
+        Set up the instance segmentation model based on the specified type (Mask R-CNN or YOLO).
+
+        Configures:
+        - Backbone (for Mask R-CNN)
+        - Classifier and mask heads
+        - Pretrained weights
+        """
+        model = self.hparams['model'].lower()      # Read the model type from hyperparameters (convert to lowercase)
+        num_classes = self.hparams['num_classes']  # Number of output classes
+
+        if model == 'mask_rcnn':
+            # Load the Mask R-CNN model with a ResNet50 backbone
+            self.model = maskrcnn_resnet50_fpn(pretrained=self.weights is True)
+
+            # Update the classification head to predict `num_classes` 
+            in_features = self.model.roi_heads.box_predictor.cls_score.in_features
+            self.model.roi_heads.box_predictor = nn.Linear(in_features, num_classes)
+
+            # Update the mask head for instance segmentation
+            in_features_mask = self.model.roi_heads.mask_predictor.conv5_mask.in_channels
+            self.model.roi_heads.mask_predictor = nn.ConvTranspose2d(
+                in_features_mask, num_classes, kernel_size=2, stride=2
+            )
+
+        elif model == 'yolo':
+            # Initialize YOLOv8 for instance segmentation
+            self.model = YOLO('yolov8n-seg')           # Load a small YOLOv8 segmentation model
+            self.model.model.args['nc'] = num_classes  # Set the number of classes in YOLO
+            if self.weights:
+                # If weights are provided, load the custom checkpoint
+                self.model = YOLO(self.weights)
+
+        else:
+            raise ValueError(
+                f"Invalid model type '{model}'. Supported models: 'mask_rcnn', 'yolo'."
+            )
+
+        # Freeze the backbone if specified (useful for transfer learning)
+        if self.hparams['freeze_backbone'] and model == 'mask_rcnn':
+            for param in self.model.backbone.parameters():
+                param.requires_grad = False  # Prevent these layers from being updated during training
+
+    def configure_metrics(self) -> None:
+        """
+        Set up metrics for evaluating instance segmentation models.
+
+        - Uses Mean Average Precision (mAP) for masks (IOU-based metric).
+        """
+        self.metrics = MeanAveragePrecision(iou_type="segm")  # Track segmentation-specific mAP
+
+    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
+        """
+        Perform a single training step.
+
+        Args:
+            batch: A batch of data from the DataLoader. Includes images and ground truth targets.
+            batch_idx: Index of the current batch.
+
+        Returns:
+            The total loss for the batch.
+        """
+        images, targets = batch['image'], batch['target']     # Unpack images and targets
+        loss_dict = self.model(images, targets)               # Compute losses (classification, box regression, mask loss, etc.)
+        loss = sum(loss for loss in loss_dict.values())       # Combine all losses into a single value
+        self.log('train_loss', loss, batch_size=len(images))  # Log the training loss for monitoring
+        return loss  # Return the loss for optimization
+
+    def validation_step(self, batch: Any, batch_idx: int) -> None:
+        """
+        Perform a single validation step.
+
+        Args:
+            batch: A batch of data from the DataLoader. Includes images and targets.
+            batch_idx: Index of the current batch.
+
+        Updates metrics and stores predictions/targets for further analysis.
+        """
+        images, targets = batch['image'], batch['target']   # Unpack images and targets
+        outputs = self.model(images)                        # Run inference on the model
+        self.metrics.update(outputs, targets)               # Update mAP metrics with predictions and ground truths
+        self.validation_outputs.append((outputs, targets))  # Store outputs for debugging or visualization
+
+    def on_validation_epoch_end(self) -> None:
+        """
+        At the end of the validation epoch, compute and log metrics.
+
+        Resets the stored outputs to free memory.
+        """
+        metrics_dict = self.metrics.compute()   # Calculate final mAP and other metrics
+        self.log_dict(metrics_dict)             # Log all computed metrics
+        self.metrics.reset()                    # Reset metrics for the next epoch
+        self.validation_outputs.clear()         # Clear stored outputs to free memory
+
+    def test_step(self, batch: Any, batch_idx: int) -> None:
+        """
+        Perform a single test step.
+
+        Similar to validation but used for test data.
+        """
+        images, targets = batch['image'], batch['target']
+        outputs = self.model(images)
+        self.metrics.update(outputs, targets)
+        self.test_outputs.append((outputs, targets))
+
+    def on_test_epoch_end(self) -> None:
+        """
+        At the end of the test epoch, compute and log metrics.
+
+        Resets the stored outputs to free memory.
+        """
+        metrics_dict = self.metrics.compute()
+        self.log_dict(metrics_dict)
+        self.metrics.reset()
+        self.test_outputs.clear()
+
+    def predict_step(self, batch: Any, batch_idx: int) -> Tensor:
+        """
+        Perform inference on a batch of images.
+
+        Args:
+            batch: A batch of images.
+
+        Returns:
+            Predicted masks and bounding boxes for the batch.
+        """
+        images = batch['image']           # Extract images from the batch
+        predictions = self.model(images)  # Run inference on the model
+        return predictions                # Return the predictions

From 52daa1ce80d13c10f56edb0e76a5c01d44b18892 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Thu, 9 Jan 2025 19:36:20 +0100
Subject: [PATCH 02/23] Add files via upload

---
 tests/trainers/test_instancesegmentation.py | 48 +++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 tests/trainers/test_instancesegmentation.py

diff --git a/tests/trainers/test_instancesegmentation.py b/tests/trainers/test_instancesegmentation.py
new file mode 100644
index 00000000000..7c1822111c5
--- /dev/null
+++ b/tests/trainers/test_instancesegmentation.py
@@ -0,0 +1,48 @@
+import torch
+import pytorch_lightning as pl
+from pytorch_lightning import LightningModule
+from torch.utils.data import DataLoader
+from torchgeo.datasets import VHR10
+from torchgeo.trainers import InstanceSegmentationTask
+
+
+# Custom collate function for DataLoader (required for Mask R-CNN models)
+def collate_fn(batch):
+    return tuple(zip(*batch))
+
+# Initialize the VHR10 dataset
+train_dataset = VHR10(root="data", split="positive", transforms=None, download=True)
+val_dataset = VHR10(root="data", split="positive", transforms=None)
+
+# Create DataLoaders
+train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
+val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)
+
+# Initialize the InstanceSegmentationTask
+task = InstanceSegmentationTask(
+    model="mask_rcnn",         # Use Mask R-CNN as the model
+    backbone="resnet50",       # ResNet-50 as the backbone
+    weights=True,              # Use pretrained weights 
+    num_classes=11,            # 10 object classes in VHR10 + 1 background class
+    lr=1e-3,                   # Learning rate
+    freeze_backbone=False      # Allow training the backbone
+)
+
+# Set up PyTorch Lightning Trainer
+trainer = pl.Trainer(
+    max_epochs=10,
+    accelerator="gpu" if torch.cuda.is_available() else "cpu",
+    devices=1
+)
+
+# Train the model
+trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)
+
+# Evaluate the model
+trainer.test(task, dataloaders=val_loader)
+
+# Example inference
+test_sample = train_dataset[0]
+test_image = test_sample["image"].unsqueeze(0)  # Add batch dimension
+predictions = task.predict_step({"image": test_image}, batch_idx=0)
+print(predictions)

From 68756a79a5583e02dd0137d90153a7d24380edd9 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Thu, 9 Jan 2025 19:43:43 +0100
Subject: [PATCH 03/23] Update instancesegmentation.py

---
 torchgeo/trainers/instancesegmentation.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/torchgeo/trainers/instancesegmentation.py b/torchgeo/trainers/instancesegmentation.py
index 9be8f819330..c07a9b38e2d 100644
--- a/torchgeo/trainers/instancesegmentation.py
+++ b/torchgeo/trainers/instancesegmentation.py
@@ -1,9 +1,9 @@
-from typing import Any                                           # Allows us to annotate arguments and return types of functions
-import torch.nn as nn                                            # PyTorch module for neural network layers
-import torch                                                     # PyTorch for deep learning operations
-from torch import Tensor                                         # Type hint for tensors
-from torchmetrics.detection.mean_ap import MeanAveragePrecision  # Metric to evaluate instance segmentation models
-from torchvision.models.detection import maskrcnn_resnet50_fpn   # Pre-built Mask R-CNN model from PyTorch
+from typing import Any                                           
+import torch.nn as nn                                            
+import torch                                                   
+from torch import Tensor                                        
+from torchmetrics.detection.mean_ap import MeanAveragePrecision  
+from torchvision.models.detection import maskrcnn_resnet50_fpn   
 from ultralytics import YOLO  
 from .base import BaseTask  
 

From 7676ac362114e4b2e88cece49f9e9800ba6f8964 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Mon, 20 Jan 2025 12:30:27 +0100
Subject: [PATCH 04/23] Update and rename instancesegmentation.py to
 instance_segmentation.py

---
 torchgeo/trainers/instance_segmentation.py | 246 +++++++++++++++++++++
 torchgeo/trainers/instancesegmentation.py  | 174 ---------------
 2 files changed, 246 insertions(+), 174 deletions(-)
 create mode 100644 torchgeo/trainers/instance_segmentation.py
 delete mode 100644 torchgeo/trainers/instancesegmentation.py

diff --git a/torchgeo/trainers/instance_segmentation.py b/torchgeo/trainers/instance_segmentation.py
new file mode 100644
index 00000000000..d28f47f610d
--- /dev/null
+++ b/torchgeo/trainers/instance_segmentation.py
@@ -0,0 +1,246 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""Trainers for instance segmentation."""
+
+from typing import Any                                            
+import torch.nn as nn                                            
+import torch                                                     
+from torch import Tensor                                         
+from torchmetrics.detection.mean_ap import MeanAveragePrecision  
+from torchmetrics import MetricCollection
+from torchvision.models.detection import maskrcnn_resnet50_fpn
+from base import BaseTask  
+
+import matplotlib.pyplot as plt
+from matplotlib.figure import Figure
+from torchgeo.datasets import RGBBandsMissingError, unbind_samples
+
+# for testing
+import pytorch_lightning as pl
+from pytorch_lightning import LightningModule
+from torch.utils.data import DataLoader
+from torchgeo.datasets import VHR10
+
+class InstanceSegmentationTask(BaseTask):
+    """Instance Segmentation."""
+
+    def __init__(
+        self,
+        model: str = 'mask_rcnn',           
+        backbone: str = 'resnet50',         
+        weights: str | bool | None = None, 
+        num_classes: int = 2,               
+        lr: float = 1e-3,                   
+        patience: int = 10,                 
+        freeze_backbone: bool = False,      
+    ) -> None:
+        """Initialize a new SemanticSegmentationTask instance.
+
+        Args:
+            model: Name of the model to use.
+            backbone: Name of the backbone to use.
+            weights: Initial model weights. Either a weight enum, the string
+                representation of a weight enum, True for ImageNet weights, False or
+                None for random weights, or the path to a saved model state dict.
+            in_channels: Number of input channels to model.
+            num_classes: Number of prediction classes (including the background).
+            lr: Learning rate for optimizer.
+            patience: Patience for learning rate scheduler.
+            freeze_backbone: Freeze the backbone network to fine-tune the
+                decoder and segmentation head.
+
+        .. versionadded:: 0.7
+        """
+        self.weights = weights         
+        super().__init__()              
+        # self.save_hyperparameters()     
+        # self.model = None               
+        # self.validation_outputs = []    
+        # self.test_outputs = []          
+        # self.configure_models()         
+        # self.configure_metrics()        
+
+    def configure_models(self) -> None:
+        """Initialize the model.
+
+        Raises:
+            ValueError: If *model* is invalid.
+        """
+        model = self.hparams['model'].lower()      
+        num_classes = self.hparams['num_classes']  
+
+        if model == 'mask_rcnn':
+            # Load the Mask R-CNN model with a ResNet50 backbone
+            self.model = maskrcnn_resnet50_fpn(weights=self.weights is True)
+
+            # Update the classification head to predict `num_classes` 
+            in_features = self.model.roi_heads.box_predictor.cls_score.in_features
+            self.model.roi_heads.box_predictor = nn.Linear(in_features, num_classes)
+
+            # Update the mask head for instance segmentation
+            in_features_mask = self.model.roi_heads.mask_predictor.conv5_mask.in_channels
+            self.model.roi_heads.mask_predictor = nn.ConvTranspose2d(
+                in_features_mask, num_classes, kernel_size=2, stride=2
+            )
+
+        else:
+            raise ValueError(
+                f"Invalid model type '{model}'. Supported model: 'mask_rcnn'"
+            )
+
+        # Freeze backbone 
+        if self.hparams['freeze_backbone']:
+            for param in self.model.backbone.parameters():
+                param.requires_grad = False  
+
+
+    def configure_metrics(self) -> None:
+        """Initialize the performance metrics.
+
+        - Uses Mean Average Precision (mAP) for masks (IOU-based metric).
+        """
+        self.metrics = MetricCollection([MeanAveragePrecision(iou_type="segm")])
+        self.train_metrics = self.metrics.clone(prefix='train_')
+        self.val_metrics = self.metrics.clone(prefix='val_')
+        self.test_metrics = self.metrics.clone(prefix='test_')
+
+    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
+        """Compute the training loss.
+
+        Args:
+            batch: A batch of data from the DataLoader. Includes images and ground truth targets.
+            batch_idx: Index of the current batch.
+
+        Returns:
+            The total loss for the batch.
+        """
+        images, targets = batch['image'], batch['target']     
+        loss_dict = self.model(images, targets)               
+        loss = sum(loss for loss in loss_dict.values())       
+        self.log('train_loss', loss, batch_size=len(images))  
+        return loss  
+
+    def validation_step(self, batch: Any, batch_idx: int) -> None:
+        """Compute the validation loss.
+
+        Args:
+            batch: A batch of data from the DataLoader. Includes images and targets.
+            batch_idx: Index of the current batch.
+
+        Updates metrics and stores predictions/targets for further analysis.
+        """
+        images, targets = batch['image'], batch['target']   
+        outputs = self.model(images)                  
+        self.metrics.update(outputs, targets)               
+        self.validation_outputs.append((outputs, targets))  
+
+        metrics_dict = self.metrics.compute()   
+        self.log_dict(metrics_dict)             
+        self.metrics.reset()    
+
+        # check
+        if (
+            batch_idx < 10
+            and hasattr(self.trainer, 'datamodule')
+            and hasattr(self.trainer.datamodule, 'plot')
+            and self.logger
+            and hasattr(self.logger, 'experiment')
+            and hasattr(self.logger.experiment, 'add_figure')
+        ):
+            datamodule = self.trainer.datamodule
+
+            batch['prediction_masks'] = [output['masks'].cpu() for output in outputs]  
+            batch['image'] = batch['image'].cpu()
+
+            sample = unbind_samples(batch)[0]
+
+            fig: Figure | None = None
+            try:
+                fig = datamodule.plot(sample)
+            except RGBBandsMissingError:
+                pass
+
+            if fig:
+                summary_writer = self.logger.experiment
+                summary_writer.add_figure(
+                    f'image/{batch_idx}', fig, global_step=self.global_step
+                )
+                plt.close()
+
+    
+    def test_step(self, batch: Any, batch_idx: int) -> None:
+        """Compute the test loss and additional metrics."""
+
+        images, targets = batch['image'], batch['target']
+        outputs = self.model(images)
+        self.metrics.update(outputs, targets)
+        self.test_outputs.append((outputs, targets))
+
+        metrics_dict = self.metrics.compute()
+        self.log_dict(metrics_dict)
+
+
+    def predict_step(self, batch: Any, batch_idx: int) -> Tensor:
+        """Perform inference on a batch of images.
+
+        Args:
+            batch: A batch of images.
+
+        Returns:
+            Predicted masks and bounding boxes for the batch.
+        """
+        images = batch['image']           
+        y_hat: Tensor = self.model(images) 
+        return y_hat            
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#=================================================================
+# TESTING
+#=================================================================
+
+def collate_fn(batch):
+    return tuple(zip(*batch))
+
+train_dataset = VHR10(root="data", split="positive", transforms=None, download=True)
+val_dataset = VHR10(root="data", split="positive", transforms=None)
+
+train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
+val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)
+
+task = InstanceSegmentationTask(   
+    model="mask_rcnn",
+    backbone="resnet50",
+    weights=True,
+    num_classes=11,
+    lr=1e-3,
+    freeze_backbone=False
+)
+
+trainer = pl.Trainer(
+    max_epochs=10,
+    accelerator="gpu" if torch.cuda.is_available() else "cpu",
+    devices=1
+)
+
+trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)
+
+trainer.test(task, dataloaders=val_loader)
+
+test_sample = train_dataset[0]
+test_image = test_sample["image"].unsqueeze(0)  
+predictions = task.predict_step({"image": test_image}, batch_idx=0)
+print(predictions)
diff --git a/torchgeo/trainers/instancesegmentation.py b/torchgeo/trainers/instancesegmentation.py
deleted file mode 100644
index c07a9b38e2d..00000000000
--- a/torchgeo/trainers/instancesegmentation.py
+++ /dev/null
@@ -1,174 +0,0 @@
-from typing import Any                                           
-import torch.nn as nn                                            
-import torch                                                   
-from torch import Tensor                                        
-from torchmetrics.detection.mean_ap import MeanAveragePrecision  
-from torchvision.models.detection import maskrcnn_resnet50_fpn   
-from ultralytics import YOLO  
-from .base import BaseTask  
-
-class InstanceSegmentationTask(BaseTask):
-    """
-    Task class for training and evaluating instance segmentation models.
-
-    This class supports Mask R-CNN and YOLO models and handles the following:
-    - Model configuration
-    - Loss computation
-    - Metric computation (e.g., Mean Average Precision)
-    - Training, validation, testing, and prediction steps
-    """
-
-    def __init__(
-        self,
-        model: str = 'mask_rcnn',           # Model type, e.g., 'mask_rcnn' or 'yolo'
-        backbone: str = 'resnet50',         # Backbone type for Mask R-CNN (ignored for YOLO)
-        weights: str | bool | None = None,  # Pretrained weights or custom checkpoint path
-        num_classes: int = 2,               # Number of classes, including background
-        lr: float = 1e-3,                   # Learning rate for the optimizer
-        patience: int = 10,                 # Patience for the learning rate scheduler
-        freeze_backbone: bool = False,      # Whether to freeze backbone layers (useful for transfer learning)
-    ) -> None:
-        """
-        Constructor for the InstanceSegmentationTask.
-
-        Initializes the hyperparameters, sets up the model and metrics.
-        """
-        self.weights = weights          # Save weights for model initialization
-        super().__init__()              # Initialize the BaseTask class (inherits common functionality)
-        self.save_hyperparameters()     # Save input arguments for later use (e.g., in checkpoints or logs)
-        self.model = None               # Placeholder for the model (to be initialized later)
-        self.validation_outputs = []    # List to store outputs during validation (used for debugging or analysis)
-        self.test_outputs = []          # List to store outputs during testing
-        self.configure_models()         # Call method to set up the model
-        self.configure_metrics()        # Call method to set up metrics
-
-    def configure_models(self) -> None:
-        """
-        Set up the instance segmentation model based on the specified type (Mask R-CNN or YOLO).
-
-        Configures:
-        - Backbone (for Mask R-CNN)
-        - Classifier and mask heads
-        - Pretrained weights
-        """
-        model = self.hparams['model'].lower()      # Read the model type from hyperparameters (convert to lowercase)
-        num_classes = self.hparams['num_classes']  # Number of output classes
-
-        if model == 'mask_rcnn':
-            # Load the Mask R-CNN model with a ResNet50 backbone
-            self.model = maskrcnn_resnet50_fpn(pretrained=self.weights is True)
-
-            # Update the classification head to predict `num_classes` 
-            in_features = self.model.roi_heads.box_predictor.cls_score.in_features
-            self.model.roi_heads.box_predictor = nn.Linear(in_features, num_classes)
-
-            # Update the mask head for instance segmentation
-            in_features_mask = self.model.roi_heads.mask_predictor.conv5_mask.in_channels
-            self.model.roi_heads.mask_predictor = nn.ConvTranspose2d(
-                in_features_mask, num_classes, kernel_size=2, stride=2
-            )
-
-        elif model == 'yolo':
-            # Initialize YOLOv8 for instance segmentation
-            self.model = YOLO('yolov8n-seg')           # Load a small YOLOv8 segmentation model
-            self.model.model.args['nc'] = num_classes  # Set the number of classes in YOLO
-            if self.weights:
-                # If weights are provided, load the custom checkpoint
-                self.model = YOLO(self.weights)
-
-        else:
-            raise ValueError(
-                f"Invalid model type '{model}'. Supported models: 'mask_rcnn', 'yolo'."
-            )
-
-        # Freeze the backbone if specified (useful for transfer learning)
-        if self.hparams['freeze_backbone'] and model == 'mask_rcnn':
-            for param in self.model.backbone.parameters():
-                param.requires_grad = False  # Prevent these layers from being updated during training
-
-    def configure_metrics(self) -> None:
-        """
-        Set up metrics for evaluating instance segmentation models.
-
-        - Uses Mean Average Precision (mAP) for masks (IOU-based metric).
-        """
-        self.metrics = MeanAveragePrecision(iou_type="segm")  # Track segmentation-specific mAP
-
-    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
-        """
-        Perform a single training step.
-
-        Args:
-            batch: A batch of data from the DataLoader. Includes images and ground truth targets.
-            batch_idx: Index of the current batch.
-
-        Returns:
-            The total loss for the batch.
-        """
-        images, targets = batch['image'], batch['target']     # Unpack images and targets
-        loss_dict = self.model(images, targets)               # Compute losses (classification, box regression, mask loss, etc.)
-        loss = sum(loss for loss in loss_dict.values())       # Combine all losses into a single value
-        self.log('train_loss', loss, batch_size=len(images))  # Log the training loss for monitoring
-        return loss  # Return the loss for optimization
-
-    def validation_step(self, batch: Any, batch_idx: int) -> None:
-        """
-        Perform a single validation step.
-
-        Args:
-            batch: A batch of data from the DataLoader. Includes images and targets.
-            batch_idx: Index of the current batch.
-
-        Updates metrics and stores predictions/targets for further analysis.
-        """
-        images, targets = batch['image'], batch['target']   # Unpack images and targets
-        outputs = self.model(images)                        # Run inference on the model
-        self.metrics.update(outputs, targets)               # Update mAP metrics with predictions and ground truths
-        self.validation_outputs.append((outputs, targets))  # Store outputs for debugging or visualization
-
-    def on_validation_epoch_end(self) -> None:
-        """
-        At the end of the validation epoch, compute and log metrics.
-
-        Resets the stored outputs to free memory.
-        """
-        metrics_dict = self.metrics.compute()   # Calculate final mAP and other metrics
-        self.log_dict(metrics_dict)             # Log all computed metrics
-        self.metrics.reset()                    # Reset metrics for the next epoch
-        self.validation_outputs.clear()         # Clear stored outputs to free memory
-
-    def test_step(self, batch: Any, batch_idx: int) -> None:
-        """
-        Perform a single test step.
-
-        Similar to validation but used for test data.
-        """
-        images, targets = batch['image'], batch['target']
-        outputs = self.model(images)
-        self.metrics.update(outputs, targets)
-        self.test_outputs.append((outputs, targets))
-
-    def on_test_epoch_end(self) -> None:
-        """
-        At the end of the test epoch, compute and log metrics.
-
-        Resets the stored outputs to free memory.
-        """
-        metrics_dict = self.metrics.compute()
-        self.log_dict(metrics_dict)
-        self.metrics.reset()
-        self.test_outputs.clear()
-
-    def predict_step(self, batch: Any, batch_idx: int) -> Tensor:
-        """
-        Perform inference on a batch of images.
-
-        Args:
-            batch: A batch of images.
-
-        Returns:
-            Predicted masks and bounding boxes for the batch.
-        """
-        images = batch['image']           # Extract images from the batch
-        predictions = self.model(images)  # Run inference on the model
-        return predictions                # Return the predictions

From 0fa7b07e1c88ab70d0edd4745f13fd690b71daf3 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Tue, 21 Jan 2025 14:44:03 +0100
Subject: [PATCH 05/23] Update test_instancesegmentation.py

---
 tests/trainers/test_instancesegmentation.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/trainers/test_instancesegmentation.py b/tests/trainers/test_instancesegmentation.py
index 7c1822111c5..d984c970e37 100644
--- a/tests/trainers/test_instancesegmentation.py
+++ b/tests/trainers/test_instancesegmentation.py
@@ -3,6 +3,8 @@
 from pytorch_lightning import LightningModule
 from torch.utils.data import DataLoader
 from torchgeo.datasets import VHR10
+from torchgeo.main import main
+
 from torchgeo.trainers import InstanceSegmentationTask
 
 
@@ -20,12 +22,12 @@ def collate_fn(batch):
 
 # Initialize the InstanceSegmentationTask
 task = InstanceSegmentationTask(
-    model="mask_rcnn",         # Use Mask R-CNN as the model
-    backbone="resnet50",       # ResNet-50 as the backbone
-    weights=True,              # Use pretrained weights 
-    num_classes=11,            # 10 object classes in VHR10 + 1 background class
-    lr=1e-3,                   # Learning rate
-    freeze_backbone=False      # Allow training the backbone
+    model="mask_rcnn",         
+    backbone="resnet50",       
+    weights=True,              
+    num_classes=11,            
+    lr=1e-3,                   
+    freeze_backbone=False      
 )
 
 # Set up PyTorch Lightning Trainer

From b4334f03dd47279958404be16fb45176152c62b8 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Tue, 21 Jan 2025 14:44:59 +0100
Subject: [PATCH 06/23] Update instance_segmentation.py

---
 torchgeo/trainers/instance_segmentation.py | 50 ----------------------
 1 file changed, 50 deletions(-)

diff --git a/torchgeo/trainers/instance_segmentation.py b/torchgeo/trainers/instance_segmentation.py
index d28f47f610d..820c61e8a9d 100644
--- a/torchgeo/trainers/instance_segmentation.py
+++ b/torchgeo/trainers/instance_segmentation.py
@@ -194,53 +194,3 @@ def predict_step(self, batch: Any, batch_idx: int) -> Tensor:
         y_hat: Tensor = self.model(images) 
         return y_hat            
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-#=================================================================
-# TESTING
-#=================================================================
-
-def collate_fn(batch):
-    return tuple(zip(*batch))
-
-train_dataset = VHR10(root="data", split="positive", transforms=None, download=True)
-val_dataset = VHR10(root="data", split="positive", transforms=None)
-
-train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
-val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)
-
-task = InstanceSegmentationTask(   
-    model="mask_rcnn",
-    backbone="resnet50",
-    weights=True,
-    num_classes=11,
-    lr=1e-3,
-    freeze_backbone=False
-)
-
-trainer = pl.Trainer(
-    max_epochs=10,
-    accelerator="gpu" if torch.cuda.is_available() else "cpu",
-    devices=1
-)
-
-trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)
-
-trainer.test(task, dataloaders=val_loader)
-
-test_sample = train_dataset[0]
-test_image = test_sample["image"].unsqueeze(0)  
-predictions = task.predict_step({"image": test_image}, batch_idx=0)
-print(predictions)

From a160baaa976467a9a2f43b9381fdb81345262ef4 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Tue, 21 Jan 2025 14:46:13 +0100
Subject: [PATCH 07/23] Update __init__.py

---
 torchgeo/trainers/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/torchgeo/trainers/__init__.py b/torchgeo/trainers/__init__.py
index ee69bff0021..608ac21a00b 100644
--- a/torchgeo/trainers/__init__.py
+++ b/torchgeo/trainers/__init__.py
@@ -12,11 +12,13 @@
 from .regression import PixelwiseRegressionTask, RegressionTask
 from .segmentation import SemanticSegmentationTask
 from .simclr import SimCLRTask
+from .instance_segmentation import InstanceSegmentationTask
 
 __all__ = (
     'BYOLTask',
     'BaseTask',
     'ClassificationTask',
+    'InstanceSegmentationTask'
     'IOBenchTask',
     'MoCoTask',
     'MultiLabelClassificationTask',

From fa8697b0120e5b570190a82ee438676a859dc820 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Tue, 21 Jan 2025 19:13:52 +0100
Subject: [PATCH 08/23] Update instance_segmentation.py

---
 torchgeo/trainers/instance_segmentation.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/torchgeo/trainers/instance_segmentation.py b/torchgeo/trainers/instance_segmentation.py
index 820c61e8a9d..78524fa18fc 100644
--- a/torchgeo/trainers/instance_segmentation.py
+++ b/torchgeo/trainers/instance_segmentation.py
@@ -10,17 +10,12 @@
 from torchmetrics.detection.mean_ap import MeanAveragePrecision  
 from torchmetrics import MetricCollection
 from torchvision.models.detection import maskrcnn_resnet50_fpn
-from base import BaseTask  
+from .base import BaseTask  
 
 import matplotlib.pyplot as plt
 from matplotlib.figure import Figure
-from torchgeo.datasets import RGBBandsMissingError, unbind_samples
+from ..datasets import RGBBandsMissingError, unbind_samples
 
-# for testing
-import pytorch_lightning as pl
-from pytorch_lightning import LightningModule
-from torch.utils.data import DataLoader
-from torchgeo.datasets import VHR10
 
 class InstanceSegmentationTask(BaseTask):
     """Instance Segmentation."""

From f6ceed184ecc50a2af487ad3c2586e541dc8abfe Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Mon, 27 Jan 2025 14:24:22 +0100
Subject: [PATCH 09/23] Update instance_segmentation.py

---
 torchgeo/trainers/instance_segmentation.py | 138 +++++++++++++++++----
 1 file changed, 112 insertions(+), 26 deletions(-)

diff --git a/torchgeo/trainers/instance_segmentation.py b/torchgeo/trainers/instance_segmentation.py
index 78524fa18fc..3be15720fd5 100644
--- a/torchgeo/trainers/instance_segmentation.py
+++ b/torchgeo/trainers/instance_segmentation.py
@@ -9,14 +9,14 @@
 from torch import Tensor                                         
 from torchmetrics.detection.mean_ap import MeanAveragePrecision  
 from torchmetrics import MetricCollection
-from torchvision.models.detection import maskrcnn_resnet50_fpn
+from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from .base import BaseTask  
 
 import matplotlib.pyplot as plt
 from matplotlib.figure import Figure
 from ..datasets import RGBBandsMissingError, unbind_samples
 
-
 class InstanceSegmentationTask(BaseTask):
     """Instance Segmentation."""
 
@@ -25,7 +25,7 @@ def __init__(
         model: str = 'mask_rcnn',           
         backbone: str = 'resnet50',         
         weights: str | bool | None = None, 
-        num_classes: int = 2,               
+        num_classes: int = 2,              
         lr: float = 1e-3,                   
         patience: int = 10,                 
         freeze_backbone: bool = False,      
@@ -49,12 +49,12 @@ def __init__(
         """
         self.weights = weights         
         super().__init__()              
-        # self.save_hyperparameters()     
-        # self.model = None               
-        # self.validation_outputs = []    
-        # self.test_outputs = []          
-        # self.configure_models()         
-        # self.configure_metrics()        
+        self.save_hyperparameters()     
+        self.model = None               
+        self.validation_outputs = []    
+        self.test_outputs = []          
+        self.configure_models()         
+        self.configure_metrics()        
 
     def configure_models(self) -> None:
         """Initialize the model.
@@ -67,11 +67,12 @@ def configure_models(self) -> None:
 
         if model == 'mask_rcnn':
             # Load the Mask R-CNN model with a ResNet50 backbone
-            self.model = maskrcnn_resnet50_fpn(weights=self.weights is True)
+            self.model = maskrcnn_resnet50_fpn(weights=MaskRCNN_ResNet50_FPN_Weights.DEFAULT)  
 
             # Update the classification head to predict `num_classes` 
             in_features = self.model.roi_heads.box_predictor.cls_score.in_features
-            self.model.roi_heads.box_predictor = nn.Linear(in_features, num_classes)
+            # self.model.roi_heads.box_predictor = nn.Linear(in_features, num_classes)
+            self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
 
             # Update the mask head for instance segmentation
             in_features_mask = self.model.roi_heads.mask_predictor.conv5_mask.in_channels
@@ -126,13 +127,50 @@ def validation_step(self, batch: Any, batch_idx: int) -> None:
         Updates metrics and stores predictions/targets for further analysis.
         """
         images, targets = batch['image'], batch['target']   
-        outputs = self.model(images)                  
-        self.metrics.update(outputs, targets)               
-        self.validation_outputs.append((outputs, targets))  
-
-        metrics_dict = self.metrics.compute()   
-        self.log_dict(metrics_dict)             
-        self.metrics.reset()    
+        batch_size = images.shape[0]
+         
+        outputs = self.model(images) 
+
+        for target in targets:
+            target["masks"] = (target["masks"] > 0).to(torch.uint8)
+            target["boxes"] = target["boxes"].to(torch.float32)
+            target["labels"] = target["labels"].to(torch.int64)
+        
+        # Compute the loss and predictions
+        loss_dict = self.model(images, targets)  # list of dictionaries
+
+        print('\nDEBUG TRAINING LOSS\n')
+        print(f"Training loss: {loss_dict}")
+
+        # Post-process `loss_dict` to compute total loss
+        total_loss = 0.0  
+        for loss in loss_dict:
+            if isinstance(loss, dict):
+                for key, value in loss.items():
+                    # Ensure the loss component is a scalar tensor
+                    if value.ndim == 0: 
+                        total_loss += value
+                    else:
+                        print(f"Skipping non-scalar loss: {key}, shape: {value.shape}")
+
+        # Post-process the outputs to ensure masks are in the correct format
+        for output in outputs:
+            if "masks" in output:
+                output["masks"] = (output["masks"] > 0.5).squeeze(1).to(torch.uint8)
+        
+        # Sum the losses
+        self.log('val_loss', total_loss, batch_size=batch_size)
+
+        metrics = self.val_metrics(outputs, targets)
+        # Log only scalar values from metrics
+        scalar_metrics = {}
+        for key, value in metrics.items():
+            if isinstance(value, torch.Tensor) and value.numel() > 1:
+                # Cast to float if integer and compute mean
+                value = value.to(torch.float32).mean()
+            scalar_metrics[key] = value
+
+        self.log_dict(scalar_metrics, batch_size=batch_size)               
 
         # check
         if (
@@ -162,21 +200,61 @@ def validation_step(self, batch: Any, batch_idx: int) -> None:
                     f'image/{batch_idx}', fig, global_step=self.global_step
                 )
                 plt.close()
-
     
     def test_step(self, batch: Any, batch_idx: int) -> None:
         """Compute the test loss and additional metrics."""
 
         images, targets = batch['image'], batch['target']
+        batch_size = images.shape[0]
+
         outputs = self.model(images)
-        self.metrics.update(outputs, targets)
-        self.test_outputs.append((outputs, targets))
 
-        metrics_dict = self.metrics.compute()
-        self.log_dict(metrics_dict)
+        print('\nDEBUG THE PREDICTIONS\n')
+        print(f"Predictions for batch {batch_idx}: {outputs}")
+        print(f"Ground truth for batch {batch_idx}: {targets}")
+
+        for target in targets:
+            target["masks"] = target["masks"].to(torch.uint8)
+            target["boxes"] = target["boxes"].to(torch.float32)
+            target["labels"] = target["labels"].to(torch.int64)
+
+        for output in outputs:
+            output["masks"] = (output["masks"] > 0.5).squeeze(1).to(torch.uint8)
+
+        loss_dict = self.model(images, targets)  # Compute all losses 
+
+        # Post-process `loss_dict` to compute total loss
+        total_loss = 0.0  
+        for loss in loss_dict:
+            if isinstance(loss, dict):
+                for key, value in loss.items():
+                    # Ensure the loss component is a scalar tensor
+                    if value.ndim == 0: 
+                        total_loss += value
+                    else:
+                        print(f"Skipping non-scalar loss: {key}, shape: {value.shape}")
+
+
+        # Sum the losses
+        self.log('test_loss', total_loss, batch_size=batch_size)
 
+        metrics = self.val_metrics(outputs, targets)
+        # Log only scalar values from metrics
+        scalar_metrics = {}
+        for key, value in metrics.items():
+            if isinstance(value, torch.Tensor) and value.numel() > 1:
+                # Cast to float if integer and compute mean
+                value = value.to(torch.float32).mean()
+            scalar_metrics[key] = value
 
-    def predict_step(self, batch: Any, batch_idx: int) -> Tensor:
+        self.log_dict(scalar_metrics, batch_size=batch_size)
+
+        print('\nDEBUG CAL METRICS\n')
+        print(f"Validation metrics: {metrics}")
+
+        return outputs
+
+    def predict_step(self, batch: Any, batch_idx: int) -> Any:
         """Perform inference on a batch of images.
 
         Args:
@@ -185,7 +263,15 @@ def predict_step(self, batch: Any, batch_idx: int) -> Tensor:
         Returns:
             Predicted masks and bounding boxes for the batch.
         """
+        self.model.eval()
         images = batch['image']           
-        y_hat: Tensor = self.model(images) 
-        return y_hat            
+        outputs = self.model(images) 
+        return outputs          
+
+
+
+
+
+
+
 

From 619760b87c2fee12e0f1ccebef33590e9b3786c8 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Mon, 27 Jan 2025 14:28:14 +0100
Subject: [PATCH 10/23] Add files via upload

---
 test_instancesegmentation.py | 123 +++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100644 test_instancesegmentation.py

diff --git a/test_instancesegmentation.py b/test_instancesegmentation.py
new file mode 100644
index 00000000000..784a48201f1
--- /dev/null
+++ b/test_instancesegmentation.py
@@ -0,0 +1,123 @@
+import torch
+import lightning.pytorch as pl
+from lightning.pytorch import LightningModule
+from torch.utils.data import DataLoader
+from torchgeo.datasets import VHR10
+from torchgeo.trainers import InstanceSegmentationTask
+import torch.nn.functional as F
+from pycocotools import mask as coco_mask
+from torch.utils.data import Subset
+import matplotlib.pyplot as plt
+import torchvision.transforms.functional as TF
+from PIL import Image, ImageDraw, ImageOps
+import numpy as np
+from matplotlib.patches import Rectangle
+from torchvision.transforms.functional import to_pil_image
+
+# Custom collate function for DataLoader (required for Mask R-CNN models)
+def collate_fn(batch):
+    max_height = max(sample['image'].shape[1] for sample in batch)
+    max_width = max(sample['image'].shape[2] for sample in batch)
+
+    images = torch.stack([
+        F.pad(sample['image'], (0, max_width - sample['image'].shape[2], 0, max_height - sample['image'].shape[1]))
+        for sample in batch
+    ])
+
+    targets = [
+        {
+            "labels": sample["labels"].to(torch.int64),
+            "boxes": sample["boxes"].to(torch.float32),
+            "masks": F.pad(
+                sample["masks"],
+                (0, max_width - sample["masks"].shape[2], 0, max_height - sample["masks"].shape[1]),
+            ).to(torch.uint8),
+        }
+        for sample in batch
+    ]
+
+    return {"image": images, "target": targets}
+
+# Visualization function
+def visualize_predictions(image, predictions, targets):
+    """Visualize model predictions and ground truth."""
+    image = to_pil_image(image)
+
+    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
+    ax.imshow(image)
+    
+    # Plot predictions
+    for box, label in zip(predictions['boxes'], predictions['labels']):
+        x1, y1, x2, y2 = box
+        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='red', facecolor='none')
+        ax.add_patch(rect)
+        ax.text(x1, y1, str(label.item()), color='red', fontsize=12)
+    
+    # Plot ground truth
+    for box, label in zip(targets['boxes'], targets['labels']):
+        x1, y1, x2, y2 = box
+        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='blue', facecolor='none')
+        ax.add_patch(rect)
+        ax.text(x1, y1, str(label.item()), color='blue', fontsize=12)
+    
+    plt.show()
+
+# Initialize the VHR10 dataset
+train_dataset = VHR10(root="data", split="positive", transforms=None, download=True)
+val_dataset = VHR10(root="data", split="positive", transforms=None)
+
+# Select a small subset of the dataset 
+N = 100  # Number of samples to use
+train_subset = Subset(train_dataset, list(range(N)))
+val_subset = Subset(val_dataset, list(range(N)))
+
+if __name__ == '__main__':
+    import multiprocessing
+    multiprocessing.set_start_method('spawn', force=True)
+
+    train_loader = DataLoader(train_subset, batch_size=1, shuffle=True, num_workers=1, collate_fn=collate_fn, persistent_workers=True)
+    val_loader = DataLoader(val_subset, batch_size=1, shuffle=False, num_workers=1, collate_fn=collate_fn, persistent_workers=True)
+
+    print('\nDEBUG TRAIN LOADER\n')
+    for batch in train_loader:
+        print(f"Image shape: {batch['image'].shape}")
+        print(f"Target: {batch['target']}")
+        break
+
+    for batch in train_loader:
+        print(batch)
+        break
+
+    trainer = pl.Trainer(
+        max_epochs=10,
+        accelerator="gpu" if torch.cuda.is_available() else "cpu",
+        devices=1
+    )
+
+    task = InstanceSegmentationTask(
+        model="mask_rcnn",         
+        backbone="resnet50",       
+        weights=True,              
+        num_classes=11,            
+        lr=1e-3,                   
+        freeze_backbone=False      
+    )
+
+    print('\nTRAIN THE MODEL\n')
+
+    trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)
+
+    print('\nEVALUATE THE MODEL\n')
+
+    trainer.test(task, dataloaders=val_loader)
+
+    print('\nINFERENCE AND VISUALIZATION\n')
+
+    test_sample = train_dataset[0]
+    test_image = test_sample["image"].unsqueeze(0)  # Add batch dimension
+    predictions = task.predict_step({"image": test_image}, batch_idx=0)
+
+    visualize_predictions(test_image, predictions[0], test_sample)
+
+
+

From d9158a0f24fb964fe0048f56719daa839a9b1f0a Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Mon, 27 Jan 2025 20:59:39 +0100
Subject: [PATCH 11/23] Update test_instancesegmentation.py

---
 tests/trainers/test_instancesegmentation.py | 297 +++++++++++++++++---
 1 file changed, 252 insertions(+), 45 deletions(-)

diff --git a/tests/trainers/test_instancesegmentation.py b/tests/trainers/test_instancesegmentation.py
index d984c970e37..99d6a118ac4 100644
--- a/tests/trainers/test_instancesegmentation.py
+++ b/tests/trainers/test_instancesegmentation.py
@@ -1,50 +1,257 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+from pathlib import Path
+from typing import Any, cast
+
+import pytest
+import segmentation_models_pytorch as smp
+import timm
 import torch
-import pytorch_lightning as pl
-from pytorch_lightning import LightningModule
-from torch.utils.data import DataLoader
-from torchgeo.datasets import VHR10
-from torchgeo.main import main
+import torch.nn as nn
+from lightning.pytorch import Trainer
+from pytest import MonkeyPatch
+from torch.nn.modules import Module
+from torchvision.models._api import WeightsEnum
 
+from torchgeo.datamodules import MisconfigurationException, SEN12MSDataModule
+from torchgeo.datasets import LandCoverAI, RGBBandsMissingError
+from torchgeo.main import main
+from torchgeo.models import ResNet18_Weights
 from torchgeo.trainers import InstanceSegmentationTask
 
 
-# Custom collate function for DataLoader (required for Mask R-CNN models)
-def collate_fn(batch):
-    return tuple(zip(*batch))
-
-# Initialize the VHR10 dataset
-train_dataset = VHR10(root="data", split="positive", transforms=None, download=True)
-val_dataset = VHR10(root="data", split="positive", transforms=None)
-
-# Create DataLoaders
-train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
-val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)
-
-# Initialize the InstanceSegmentationTask
-task = InstanceSegmentationTask(
-    model="mask_rcnn",         
-    backbone="resnet50",       
-    weights=True,              
-    num_classes=11,            
-    lr=1e-3,                   
-    freeze_backbone=False      
-)
-
-# Set up PyTorch Lightning Trainer
-trainer = pl.Trainer(
-    max_epochs=10,
-    accelerator="gpu" if torch.cuda.is_available() else "cpu",
-    devices=1
-)
-
-# Train the model
-trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)
-
-# Evaluate the model
-trainer.test(task, dataloaders=val_loader)
-
-# Example inference
-test_sample = train_dataset[0]
-test_image = test_sample["image"].unsqueeze(0)  # Add batch dimension
-predictions = task.predict_step({"image": test_image}, batch_idx=0)
-print(predictions)
+class SegmentationTestModel(Module):
+    def __init__(self, in_channels: int = 3, classes: int = 3, **kwargs: Any) -> None:
+        super().__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels=in_channels, out_channels=classes, kernel_size=1, padding=0
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return cast(torch.Tensor, self.conv1(x))
+
+
+def create_model(**kwargs: Any) -> Module:
+    return SegmentationTestModel(**kwargs)
+
+
+def plot(*args: Any, **kwargs: Any) -> None:
+    return None
+
+
+def plot_missing_bands(*args: Any, **kwargs: Any) -> None:
+    raise RGBBandsMissingError()
+
+
+class TestSemanticSegmentationTask:
+    @pytest.mark.parametrize(
+        'name',
+        [
+            'agrifieldnet',
+            'cabuar',
+            'chabud',
+            'chesapeake_cvpr_5',
+            'chesapeake_cvpr_7',
+            'deepglobelandcover',
+            'etci2021',
+            'ftw',
+            'geonrw',
+            'gid15',
+            'inria',
+            'l7irish',
+            'l8biome',
+            'landcoverai',
+            'landcoverai100',
+            'loveda',
+            'naipchesapeake',
+            'potsdam2d',
+            'sen12ms_all',
+            'sen12ms_s1',
+            'sen12ms_s2_all',
+            'sen12ms_s2_reduced',
+            'sentinel2_cdl',
+            'sentinel2_eurocrops',
+            'sentinel2_nccm',
+            'sentinel2_south_america_soybean',
+            'southafricacroptype',
+            'spacenet1',
+            'spacenet6',
+            'ssl4eo_l_benchmark_cdl',
+            'ssl4eo_l_benchmark_nlcd',
+            'vaihingen2d',
+        ],
+    )
+    def test_trainer(
+        self, monkeypatch: MonkeyPatch, name: str, fast_dev_run: bool
+    ) -> None:
+        match name:
+            case 'chabud' | 'cabuar':
+                pytest.importorskip('h5py', minversion='3.6')
+            case 'ftw':
+                pytest.importorskip('pyarrow')
+            case 'landcoverai':
+                sha256 = (
+                    'ecec8e871faf1bbd8ca525ca95ddc1c1f5213f40afb94599884bd85f990ebd6b'
+                )
+                monkeypatch.setattr(LandCoverAI, 'sha256', sha256)
+
+        config = os.path.join('tests', 'conf', name + '.yaml')
+
+        monkeypatch.setattr(smp, 'Unet', create_model)
+        monkeypatch.setattr(smp, 'DeepLabV3Plus', create_model)
+
+        args = [
+            '--config',
+            config,
+            '--trainer.accelerator',
+            'cpu',
+            '--trainer.fast_dev_run',
+            str(fast_dev_run),
+            '--trainer.max_epochs',
+            '1',
+            '--trainer.log_every_n_steps',
+            '1',
+        ]
+
+        main(['fit', *args])
+        try:
+            main(['test', *args])
+        except MisconfigurationException:
+            pass
+        try:
+            main(['predict', *args])
+        except MisconfigurationException:
+            pass
+
+    @pytest.fixture
+    def weights(self) -> WeightsEnum:
+        return ResNet18_Weights.SENTINEL2_ALL_MOCO
+
+    @pytest.fixture
+    def mocked_weights(
+        self,
+        tmp_path: Path,
+        monkeypatch: MonkeyPatch,
+        weights: WeightsEnum,
+        load_state_dict_from_url: None,
+    ) -> WeightsEnum:
+        path = tmp_path / f'{weights}.pth'
+        model = timm.create_model(
+            weights.meta['model'], in_chans=weights.meta['in_chans']
+        )
+        torch.save(model.state_dict(), path)
+        try:
+            monkeypatch.setattr(weights.value, 'url', str(path))
+        except AttributeError:
+            monkeypatch.setattr(weights, 'url', str(path))
+        return weights
+
+    def test_weight_file(self, checkpoint: str) -> None:
+        InstanceSegmentationTask(backbone='resnet18', weights=checkpoint, num_classes=6)
+
+    def test_weight_enum(self, mocked_weights: WeightsEnum) -> None:
+        InstanceSegmentationTask(
+            backbone=mocked_weights.meta['model'],
+            weights=mocked_weights,
+            in_channels=mocked_weights.meta['in_chans'],
+        )
+
+    def test_weight_str(self, mocked_weights: WeightsEnum) -> None:
+        InstanceSegmentationTask(
+            backbone=mocked_weights.meta['model'],
+            weights=str(mocked_weights),
+            in_channels=mocked_weights.meta['in_chans'],
+        )
+
+    @pytest.mark.slow
+    def test_weight_enum_download(self, weights: WeightsEnum) -> None:
+        InstanceSegmentationTask(
+            backbone=weights.meta['model'],
+            weights=weights,
+            in_channels=weights.meta['in_chans'],
+        )
+
+    @pytest.mark.slow
+    def test_weight_str_download(self, weights: WeightsEnum) -> None:
+        InstanceSegmentationTask(
+            backbone=weights.meta['model'],
+            weights=str(weights),
+            in_channels=weights.meta['in_chans'],
+        )
+
+    def test_invalid_model(self) -> None:
+        match = "Model type 'invalid_model' is not valid."
+        with pytest.raises(ValueError, match=match):
+            InstanceSegmentationTask(model='invalid_model')
+
+    def test_invalid_loss(self) -> None:
+        match = "Loss type 'invalid_loss' is not valid."
+        with pytest.raises(ValueError, match=match):
+            InstanceSegmentationTask(loss='invalid_loss')
+
+    def test_no_plot_method(self, monkeypatch: MonkeyPatch, fast_dev_run: bool) -> None:
+        monkeypatch.setattr(SEN12MSDataModule, 'plot', plot)
+        datamodule = SEN12MSDataModule(
+            root='tests/data/sen12ms', batch_size=1, num_workers=0
+        )
+        model = InstanceSegmentationTask(
+            backbone='resnet18', in_channels=15, num_classes=6
+        )
+        trainer = Trainer(
+            accelerator='cpu',
+            fast_dev_run=fast_dev_run,
+            log_every_n_steps=1,
+            max_epochs=1,
+        )
+        trainer.validate(model=model, datamodule=datamodule)
+
+    def test_no_rgb(self, monkeypatch: MonkeyPatch, fast_dev_run: bool) -> None:
+        monkeypatch.setattr(SEN12MSDataModule, 'plot', plot_missing_bands)
+        datamodule = SEN12MSDataModule(
+            root='tests/data/sen12ms', batch_size=1, num_workers=0
+        )
+        model = InstanceSegmentationTask(
+            backbone='resnet18', in_channels=15, num_classes=6
+        )
+        trainer = Trainer(
+            accelerator='cpu',
+            fast_dev_run=fast_dev_run,
+            log_every_n_steps=1,
+            max_epochs=1,
+        )
+        trainer.validate(model=model, datamodule=datamodule)
+
+    @pytest.mark.parametrize('model_name', ['unet', 'deeplabv3+'])
+    @pytest.mark.parametrize(
+        'backbone', ['resnet18', 'mobilenet_v2', 'efficientnet-b0']
+    )
+    def test_freeze_backbone(self, model_name: str, backbone: str) -> None:
+        model = InstanceSegmentationTask(
+            model=model_name, backbone=backbone, freeze_backbone=True
+        )
+        assert all(
+            [param.requires_grad is False for param in model.model.encoder.parameters()]
+        )
+        assert all([param.requires_grad for param in model.model.decoder.parameters()])
+        assert all(
+            [
+                param.requires_grad
+                for param in model.model.segmentation_head.parameters()
+            ]
+        )
+
+    # @pytest.mark.parametrize('model_name', ['unet', 'deeplabv3+'])
+    # def test_freeze_decoder(self, model_name: str) -> None:
+    #     model = InstanceSegmentationTask(model=model_name, freeze_decoder=True)
+    #     assert all(
+    #         [param.requires_grad is False for param in model.model.decoder.parameters()]
+    #     )
+    #     assert all([param.requires_grad for param in model.model.encoder.parameters()])
+    #     assert all(
+    #         [
+    #             param.requires_grad
+    #             for param in model.model.segmentation_head.parameters()
+    #         ]
+    #     )

From 9f48f5064e0a862f20317e061a7d306632a95107 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Tue, 28 Jan 2025 10:28:10 +0100
Subject: [PATCH 12/23] Update and rename test_instancesegmentation.py to
 test_trainer_instancesegmentation.py

---
 test_instancesegmentation.py         | 123 ---------------------------
 test_trainer_instancesegmentation.py | 123 +++++++++++++++++++++++++++
 2 files changed, 123 insertions(+), 123 deletions(-)
 delete mode 100644 test_instancesegmentation.py
 create mode 100644 test_trainer_instancesegmentation.py

diff --git a/test_instancesegmentation.py b/test_instancesegmentation.py
deleted file mode 100644
index 784a48201f1..00000000000
--- a/test_instancesegmentation.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import torch
-import lightning.pytorch as pl
-from lightning.pytorch import LightningModule
-from torch.utils.data import DataLoader
-from torchgeo.datasets import VHR10
-from torchgeo.trainers import InstanceSegmentationTask
-import torch.nn.functional as F
-from pycocotools import mask as coco_mask
-from torch.utils.data import Subset
-import matplotlib.pyplot as plt
-import torchvision.transforms.functional as TF
-from PIL import Image, ImageDraw, ImageOps
-import numpy as np
-from matplotlib.patches import Rectangle
-from torchvision.transforms.functional import to_pil_image
-
-# Custom collate function for DataLoader (required for Mask R-CNN models)
-def collate_fn(batch):
-    max_height = max(sample['image'].shape[1] for sample in batch)
-    max_width = max(sample['image'].shape[2] for sample in batch)
-
-    images = torch.stack([
-        F.pad(sample['image'], (0, max_width - sample['image'].shape[2], 0, max_height - sample['image'].shape[1]))
-        for sample in batch
-    ])
-
-    targets = [
-        {
-            "labels": sample["labels"].to(torch.int64),
-            "boxes": sample["boxes"].to(torch.float32),
-            "masks": F.pad(
-                sample["masks"],
-                (0, max_width - sample["masks"].shape[2], 0, max_height - sample["masks"].shape[1]),
-            ).to(torch.uint8),
-        }
-        for sample in batch
-    ]
-
-    return {"image": images, "target": targets}
-
-# Visualization function
-def visualize_predictions(image, predictions, targets):
-    """Visualize model predictions and ground truth."""
-    image = to_pil_image(image)
-
-    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
-    ax.imshow(image)
-    
-    # Plot predictions
-    for box, label in zip(predictions['boxes'], predictions['labels']):
-        x1, y1, x2, y2 = box
-        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='red', facecolor='none')
-        ax.add_patch(rect)
-        ax.text(x1, y1, str(label.item()), color='red', fontsize=12)
-    
-    # Plot ground truth
-    for box, label in zip(targets['boxes'], targets['labels']):
-        x1, y1, x2, y2 = box
-        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='blue', facecolor='none')
-        ax.add_patch(rect)
-        ax.text(x1, y1, str(label.item()), color='blue', fontsize=12)
-    
-    plt.show()
-
-# Initialize the VHR10 dataset
-train_dataset = VHR10(root="data", split="positive", transforms=None, download=True)
-val_dataset = VHR10(root="data", split="positive", transforms=None)
-
-# Select a small subset of the dataset 
-N = 100  # Number of samples to use
-train_subset = Subset(train_dataset, list(range(N)))
-val_subset = Subset(val_dataset, list(range(N)))
-
-if __name__ == '__main__':
-    import multiprocessing
-    multiprocessing.set_start_method('spawn', force=True)
-
-    train_loader = DataLoader(train_subset, batch_size=1, shuffle=True, num_workers=1, collate_fn=collate_fn, persistent_workers=True)
-    val_loader = DataLoader(val_subset, batch_size=1, shuffle=False, num_workers=1, collate_fn=collate_fn, persistent_workers=True)
-
-    print('\nDEBUG TRAIN LOADER\n')
-    for batch in train_loader:
-        print(f"Image shape: {batch['image'].shape}")
-        print(f"Target: {batch['target']}")
-        break
-
-    for batch in train_loader:
-        print(batch)
-        break
-
-    trainer = pl.Trainer(
-        max_epochs=10,
-        accelerator="gpu" if torch.cuda.is_available() else "cpu",
-        devices=1
-    )
-
-    task = InstanceSegmentationTask(
-        model="mask_rcnn",         
-        backbone="resnet50",       
-        weights=True,              
-        num_classes=11,            
-        lr=1e-3,                   
-        freeze_backbone=False      
-    )
-
-    print('\nTRAIN THE MODEL\n')
-
-    trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)
-
-    print('\nEVALUATE THE MODEL\n')
-
-    trainer.test(task, dataloaders=val_loader)
-
-    print('\nINFERENCE AND VISUALIZATION\n')
-
-    test_sample = train_dataset[0]
-    test_image = test_sample["image"].unsqueeze(0)  # Add batch dimension
-    predictions = task.predict_step({"image": test_image}, batch_idx=0)
-
-    visualize_predictions(test_image, predictions[0], test_sample)
-
-
-
diff --git a/test_trainer_instancesegmentation.py b/test_trainer_instancesegmentation.py
new file mode 100644
index 00000000000..2073b3e34c4
--- /dev/null
+++ b/test_trainer_instancesegmentation.py
@@ -0,0 +1,123 @@
+import torch
+import lightning.pytorch as pl
+from torch.utils.data import DataLoader, Subset
+from torchgeo.datasets import VHR10
+from torchvision.transforms.functional import to_pil_image
+from matplotlib.patches import Rectangle
+import matplotlib.pyplot as plt
+import torch.nn.functional as F
+from torchgeo.trainers import InstanceSegmentationTask  
+
+def collate_fn(batch):
+    """Custom collate function for DataLoader."""
+    max_height = max(sample['image'].shape[1] for sample in batch)
+    max_width = max(sample['image'].shape[2] for sample in batch)
+
+    images = torch.stack([
+        F.pad(sample['image'], (0, max_width - sample['image'].shape[2], 0, max_height - sample['image'].shape[1]))
+        for sample in batch
+    ])
+
+    targets = [
+        {
+            "labels": sample["labels"].to(torch.int64),
+            "boxes": sample["boxes"].to(torch.float32),
+            "masks": F.pad(
+                sample["masks"],
+                (0, max_width - sample["masks"].shape[2], 0, max_height - sample["masks"].shape[1]),
+            ).to(torch.uint8),
+        }
+        for sample in batch
+    ]
+
+    return {"image": images, "target": targets}
+
+def visualize_predictions(image, predictions, targets):
+    """Visualize predictions and ground truth."""
+    image = to_pil_image(image)
+
+    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
+    ax.imshow(image)
+
+    # Predictions
+    for box, label in zip(predictions['boxes'], predictions['labels']):
+        x1, y1, x2, y2 = box
+        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='red', facecolor='none')
+        ax.add_patch(rect)
+        ax.text(x1, y1, f"Pred: {label.item()}", color='red', fontsize=12)
+
+    # Ground truth
+    for box, label in zip(targets['boxes'], targets['labels']):
+        x1, y1, x2, y2 = box
+        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='blue', facecolor='none')
+        ax.add_patch(rect)
+        ax.text(x1, y1, f"GT: {label.item()}", color='blue', fontsize=12)
+
+    plt.show()
+
+def plot_losses(train_losses, val_losses):
+    """Plot training and validation losses over epochs."""
+    plt.figure(figsize=(10, 5))
+    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Training Loss', marker='o')
+    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss', marker='s')
+    plt.xlabel('Epochs')
+    plt.ylabel('Loss')
+    plt.title('Training and Validation Loss Over Epochs')
+    plt.legend()
+    plt.grid()
+    plt.show()
+
+# Initialize VHR-10 dataset
+train_dataset = VHR10(root="data", split="positive", transforms=None, download=True)
+val_dataset = VHR10(root="data", split="positive", transforms=None)
+
+# Subset for quick experimentation (adjust N as needed)
+N = 100
+train_subset = Subset(train_dataset, list(range(N)))
+val_subset = Subset(val_dataset, list(range(N)))
+
+
+if __name__ == '__main__':
+    import multiprocessing
+    multiprocessing.set_start_method('spawn', force=True)
+
+    train_loader = DataLoader(train_subset, batch_size=8, shuffle=True, num_workers=1, collate_fn=collate_fn)
+    val_loader = DataLoader(val_subset, batch_size=8, shuffle=False, num_workers=1, collate_fn=collate_fn)
+
+    # Trainer setup
+    trainer = pl.Trainer(
+        max_epochs=5, 
+        accelerator="gpu" if torch.cuda.is_available() else "cpu",
+        devices=1
+    )
+
+    task = InstanceSegmentationTask(
+        model="mask_rcnn",          
+        backbone="resnet50",        
+        weights="imagenet",         # Pretrained on ImageNet
+        num_classes=11,             # VHR-10 has 10 classes + 1 background
+        lr=1e-3,                    
+        freeze_backbone=False       
+    )
+
+    print('\nSTART TRAINING\n')
+    # trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)
+    train_losses, val_losses = [], []
+    for epoch in range(5):
+        trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)
+        train_loss = task.trainer.callback_metrics.get("train_loss")
+        val_loss = task.trainer.callback_metrics.get("val_loss")
+        if train_loss is not None:
+            train_losses.append(train_loss.item())
+        if val_loss is not None:
+            val_losses.append(val_loss.item())
+    
+    plot_losses(train_losses, val_losses)
+
+    #trainer.test(task, dataloaders=val_loader)
+
+    # Inference and Visualization
+    sample = train_dataset[1]
+    image = sample['image'].unsqueeze(0)  
+    predictions = task.predict_step({"image": image}, batch_idx=0)
+    visualize_predictions(image[0], predictions[0], sample)

From 63aefc81712e7ece1f471584cb73de04cee213fe Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Tue, 28 Jan 2025 10:29:49 +0100
Subject: [PATCH 13/23] Update instance_segmentation.py

---
 torchgeo/trainers/instance_segmentation.py | 60 ++++++----------------
 1 file changed, 17 insertions(+), 43 deletions(-)

diff --git a/torchgeo/trainers/instance_segmentation.py b/torchgeo/trainers/instance_segmentation.py
index 3be15720fd5..c705bb912aa 100644
--- a/torchgeo/trainers/instance_segmentation.py
+++ b/torchgeo/trainers/instance_segmentation.py
@@ -12,7 +12,6 @@
 from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from .base import BaseTask  
-
 import matplotlib.pyplot as plt
 from matplotlib.figure import Figure
 from ..datasets import RGBBandsMissingError, unbind_samples
@@ -113,7 +112,12 @@ def training_step(self, batch: Any, batch_idx: int) -> Tensor:
         """
         images, targets = batch['image'], batch['target']     
         loss_dict = self.model(images, targets)               
-        loss = sum(loss for loss in loss_dict.values())       
+        loss = sum(loss for loss in loss_dict.values())  
+
+        print('\nTRAINING LOSS\n')
+        print(loss_dict, '\n\n')
+        print(loss)
+
         self.log('train_loss', loss, batch_size=len(images))  
         return loss  
 
@@ -130,29 +134,14 @@ def validation_step(self, batch: Any, batch_idx: int) -> None:
         batch_size = images.shape[0]
          
         outputs = self.model(images) 
+        loss_dict = self.model(images, targets)  # list of dictionaries
+        total_loss = sum(loss_item for loss_dict in loss_dict for loss_item in loss_dict.values() if loss_item.ndim == 0)
 
         for target in targets:
             target["masks"] = (target["masks"] > 0).to(torch.uint8)
             target["boxes"] = target["boxes"].to(torch.float32)
             target["labels"] = target["labels"].to(torch.int64)
         
-        # Compute the loss and predictions
-        loss_dict = self.model(images, targets)  # list of dictionaries
-
-        print('\nDEBUG TRAINING LOSS\n')
-        print(f"Training loss: {loss_dict}")
-
-        # Post-process `loss_dict` to compute total loss
-        total_loss = 0.0  
-        for loss in loss_dict:
-            if isinstance(loss, dict):
-                for key, value in loss.items():
-                    # Ensure the loss component is a scalar tensor
-                    if value.ndim == 0: 
-                        total_loss += value
-                    else:
-                        print(f"Skipping non-scalar loss: {key}, shape: {value.shape}")
-
         # Post-process the outputs to ensure masks are in the correct format
         for output in outputs:
             if "masks" in output:
@@ -170,7 +159,7 @@ def validation_step(self, batch: Any, batch_idx: int) -> None:
                 value = value.to(torch.float32).mean()
             scalar_metrics[key] = value
 
-        self.log_dict(scalar_metrics, batch_size=batch_size)               
+        self.log_dict(scalar_metrics, batch_size=batch_size)           
 
         # check
         if (
@@ -208,10 +197,8 @@ def test_step(self, batch: Any, batch_idx: int) -> None:
         batch_size = images.shape[0]
 
         outputs = self.model(images)
-
-        print('\nDEBUG THE PREDICTIONS\n')
-        print(f"Predictions for batch {batch_idx}: {outputs}")
-        print(f"Ground truth for batch {batch_idx}: {targets}")
+        loss_dict = self.model(images, targets)  # Compute all losses 
+        total_loss = sum(loss_item for loss_dict in loss_dict for loss_item in loss_dict.values() if loss_item.ndim == 0)
 
         for target in targets:
             target["masks"] = target["masks"].to(torch.uint8)
@@ -221,21 +208,6 @@ def test_step(self, batch: Any, batch_idx: int) -> None:
         for output in outputs:
             output["masks"] = (output["masks"] > 0.5).squeeze(1).to(torch.uint8)
 
-        loss_dict = self.model(images, targets)  # Compute all losses 
-
-        # Post-process `loss_dict` to compute total loss
-        total_loss = 0.0  
-        for loss in loss_dict:
-            if isinstance(loss, dict):
-                for key, value in loss.items():
-                    # Ensure the loss component is a scalar tensor
-                    if value.ndim == 0: 
-                        total_loss += value
-                    else:
-                        print(f"Skipping non-scalar loss: {key}, shape: {value.shape}")
-
-
-        # Sum the losses
         self.log('test_loss', total_loss, batch_size=batch_size)
 
         metrics = self.val_metrics(outputs, targets)
@@ -249,10 +221,9 @@ def test_step(self, batch: Any, batch_idx: int) -> None:
 
         self.log_dict(scalar_metrics, batch_size=batch_size)
 
-        print('\nDEBUG CAL METRICS\n')
-        print(f"Validation metrics: {metrics}")
-
-        return outputs
+        print('\nTESTING LOSS\n')
+        print(loss_dict, '\n\n')
+        print(total_loss)
 
     def predict_step(self, batch: Any, batch_idx: int) -> Any:
         """Perform inference on a batch of images.
@@ -266,6 +237,9 @@ def predict_step(self, batch: Any, batch_idx: int) -> Any:
         self.model.eval()
         images = batch['image']           
         outputs = self.model(images) 
+
+        for output in outputs:
+            output["masks"] = (output["masks"] > 0.5).to(torch.uint8)
         return outputs          
 
 

From 70074e7b9f94d1126b2efcd9e2b43f3ee4c746d2 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Tue, 28 Jan 2025 19:12:53 +0100
Subject: [PATCH 14/23] Add files via upload

---
 test_trainer.ipynb | 150 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 150 insertions(+)
 create mode 100644 test_trainer.ipynb

diff --git a/test_trainer.ipynb b/test_trainer.ipynb
new file mode 100644
index 00000000000..3f5a0b99c18
--- /dev/null
+++ b/test_trainer.ipynb
@@ -0,0 +1,150 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import lightning.pytorch as pl\n",
+    "from torch.utils.data import DataLoader, Subset\n",
+    "from torchgeo.datasets import VHR10\n",
+    "from torchvision.transforms.functional import to_pil_image\n",
+    "from matplotlib.patches import Rectangle\n",
+    "import matplotlib.pyplot as plt\n",
+    "import torch.nn.functional as F\n",
+    "from torchgeo.trainers import InstanceSegmentationTask  \n",
+    "import matplotlib.patches as patches\n",
+    "import numpy as np\n",
+    "\n",
+    "def collate_fn(batch):\n",
+    "    \"\"\"Custom collate function for DataLoader.\"\"\"\n",
+    "    max_height = max(sample['image'].shape[1] for sample in batch)\n",
+    "    max_width = max(sample['image'].shape[2] for sample in batch)\n",
+    "\n",
+    "    images = torch.stack([\n",
+    "        F.pad(sample['image'], (0, max_width - sample['image'].shape[2], 0, max_height - sample['image'].shape[1]))\n",
+    "        for sample in batch\n",
+    "    ])\n",
+    "\n",
+    "    targets = [\n",
+    "        {\n",
+    "            \"labels\": sample[\"labels\"].to(torch.int64),\n",
+    "            \"boxes\": sample[\"boxes\"].to(torch.float32),\n",
+    "            \"masks\": F.pad(\n",
+    "                sample[\"masks\"],\n",
+    "                (0, max_width - sample[\"masks\"].shape[2], 0, max_height - sample[\"masks\"].shape[1]),\n",
+    "            ).to(torch.uint8),\n",
+    "        }\n",
+    "        for sample in batch\n",
+    "    ]\n",
+    "\n",
+    "    return {\"image\": images, \"target\": targets}\n",
+    "\n",
+    "def visualize_predictions(image, predictions, targets):\n",
+    "    \"\"\"Visualize predictions and ground truth.\"\"\"\n",
+    "    image = to_pil_image(image)\n",
+    "\n",
+    "    fig, ax = plt.subplots(1, 1, figsize=(10, 10))\n",
+    "    ax.imshow(image)\n",
+    "\n",
+    "    # Predictions\n",
+    "    for box, label in zip(predictions['boxes'], predictions['labels']):\n",
+    "        x1, y1, x2, y2 = box\n",
+    "        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='red', facecolor='none')\n",
+    "        ax.add_patch(rect)\n",
+    "        ax.text(x1, y1, f\"Pred: {label.item()}\", color='red', fontsize=12)\n",
+    "\n",
+    "    # Ground truth\n",
+    "    for box, label in zip(targets['boxes'], targets['labels']):\n",
+    "        x1, y1, x2, y2 = box\n",
+    "        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='blue', facecolor='none')\n",
+    "        ax.add_patch(rect)\n",
+    "        ax.text(x1, y1, f\"GT: {label.item()}\", color='blue', fontsize=12)\n",
+    "\n",
+    "    plt.show()\n",
+    "\n",
+    "def plot_losses(train_losses, val_losses):\n",
+    "    \"\"\"Plot training and validation losses over epochs.\"\"\"\n",
+    "    plt.figure(figsize=(10, 5))\n",
+    "    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Training Loss', marker='o')\n",
+    "    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss', marker='s')\n",
+    "    plt.xlabel('Epochs')\n",
+    "    plt.ylabel('Loss')\n",
+    "    plt.title('Training and Validation Loss Over Epochs')\n",
+    "    plt.legend()\n",
+    "    plt.grid()\n",
+    "    plt.show()\n",
+    "\n",
+    "# Initialize VHR-10 dataset\n",
+    "train_dataset = VHR10(root=\"data\", split=\"positive\", transforms=None, download=True)\n",
+    "val_dataset = VHR10(root=\"data\", split=\"positive\", transforms=None)\n",
+    "\n",
+    "# Subset for quick experimentation (adjust N as needed)\n",
+    "N = 100\n",
+    "train_subset = Subset(train_dataset, list(range(N)))\n",
+    "val_subset = Subset(val_dataset, list(range(N)))\n",
+    "\n",
+    "if __name__ == '__main__':\n",
+    "    import multiprocessing\n",
+    "    multiprocessing.set_start_method('spawn', force=True)\n",
+    "\n",
+    "    train_loader = DataLoader(train_subset, batch_size=8, shuffle=True, num_workers=1, collate_fn=collate_fn)\n",
+    "    val_loader = DataLoader(val_subset, batch_size=8, shuffle=False, num_workers=1, collate_fn=collate_fn)\n",
+    "\n",
+    "    # Trainer setup\n",
+    "    trainer = pl.Trainer(\n",
+    "        max_epochs=5, \n",
+    "        accelerator=\"gpu\" if torch.cuda.is_available() else \"cpu\",\n",
+    "        devices=1\n",
+    "    )\n",
+    "\n",
+    "    task = InstanceSegmentationTask(\n",
+    "        model=\"mask_rcnn\",          \n",
+    "        backbone=\"resnet50\",        \n",
+    "        weights=\"imagenet\",         # Pretrained on ImageNet\n",
+    "        num_classes=11,             # VHR-10 has 10 classes + 1 background\n",
+    "        lr=1e-3,                    \n",
+    "        freeze_backbone=False       \n",
+    "    )\n",
+    "\n",
+    "    print('\\nSTART TRAINING\\n')\n",
+    "    # trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)\n",
+    "    train_losses, val_losses = [], []\n",
+    "    for epoch in range(5):\n",
+    "        trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)\n",
+    "        train_loss = task.trainer.callback_metrics.get(\"train_loss\")\n",
+    "        val_loss = task.trainer.callback_metrics.get(\"val_loss\")\n",
+    "        if train_loss is not None:\n",
+    "            train_losses.append(train_loss.item())\n",
+    "        if val_loss is not None:\n",
+    "            val_losses.append(val_loss.item())\n",
+    "    \n",
+    "    plot_losses(train_losses, val_losses)\n",
+    "\n",
+    "    #trainer.test(task, dataloaders=val_loader)\n",
+    "\n",
+    "    # Inference and Visualization\n",
+    "    sample = train_dataset[1]\n",
+    "    image = sample['image'].unsqueeze(0)  \n",
+    "    predictions = task.predict_step({\"image\": image}, batch_idx=0)\n",
+    "    visualize_predictions(image[0], predictions[0], sample)\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From b3de0017e224ebab2bbba13754ad9f7d5b709def Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Tue, 28 Jan 2025 19:14:35 +0100
Subject: [PATCH 15/23] Creato con Colab

---
 test_trainer.ipynb | 332 +++++++++++++++++++++++++--------------------
 1 file changed, 184 insertions(+), 148 deletions(-)

diff --git a/test_trainer.ipynb b/test_trainer.ipynb
index 3f5a0b99c18..34d06eaa744 100644
--- a/test_trainer.ipynb
+++ b/test_trainer.ipynb
@@ -1,150 +1,186 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "import lightning.pytorch as pl\n",
-    "from torch.utils.data import DataLoader, Subset\n",
-    "from torchgeo.datasets import VHR10\n",
-    "from torchvision.transforms.functional import to_pil_image\n",
-    "from matplotlib.patches import Rectangle\n",
-    "import matplotlib.pyplot as plt\n",
-    "import torch.nn.functional as F\n",
-    "from torchgeo.trainers import InstanceSegmentationTask  \n",
-    "import matplotlib.patches as patches\n",
-    "import numpy as np\n",
-    "\n",
-    "def collate_fn(batch):\n",
-    "    \"\"\"Custom collate function for DataLoader.\"\"\"\n",
-    "    max_height = max(sample['image'].shape[1] for sample in batch)\n",
-    "    max_width = max(sample['image'].shape[2] for sample in batch)\n",
-    "\n",
-    "    images = torch.stack([\n",
-    "        F.pad(sample['image'], (0, max_width - sample['image'].shape[2], 0, max_height - sample['image'].shape[1]))\n",
-    "        for sample in batch\n",
-    "    ])\n",
-    "\n",
-    "    targets = [\n",
-    "        {\n",
-    "            \"labels\": sample[\"labels\"].to(torch.int64),\n",
-    "            \"boxes\": sample[\"boxes\"].to(torch.float32),\n",
-    "            \"masks\": F.pad(\n",
-    "                sample[\"masks\"],\n",
-    "                (0, max_width - sample[\"masks\"].shape[2], 0, max_height - sample[\"masks\"].shape[1]),\n",
-    "            ).to(torch.uint8),\n",
-    "        }\n",
-    "        for sample in batch\n",
-    "    ]\n",
-    "\n",
-    "    return {\"image\": images, \"target\": targets}\n",
-    "\n",
-    "def visualize_predictions(image, predictions, targets):\n",
-    "    \"\"\"Visualize predictions and ground truth.\"\"\"\n",
-    "    image = to_pil_image(image)\n",
-    "\n",
-    "    fig, ax = plt.subplots(1, 1, figsize=(10, 10))\n",
-    "    ax.imshow(image)\n",
-    "\n",
-    "    # Predictions\n",
-    "    for box, label in zip(predictions['boxes'], predictions['labels']):\n",
-    "        x1, y1, x2, y2 = box\n",
-    "        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='red', facecolor='none')\n",
-    "        ax.add_patch(rect)\n",
-    "        ax.text(x1, y1, f\"Pred: {label.item()}\", color='red', fontsize=12)\n",
-    "\n",
-    "    # Ground truth\n",
-    "    for box, label in zip(targets['boxes'], targets['labels']):\n",
-    "        x1, y1, x2, y2 = box\n",
-    "        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='blue', facecolor='none')\n",
-    "        ax.add_patch(rect)\n",
-    "        ax.text(x1, y1, f\"GT: {label.item()}\", color='blue', fontsize=12)\n",
-    "\n",
-    "    plt.show()\n",
-    "\n",
-    "def plot_losses(train_losses, val_losses):\n",
-    "    \"\"\"Plot training and validation losses over epochs.\"\"\"\n",
-    "    plt.figure(figsize=(10, 5))\n",
-    "    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Training Loss', marker='o')\n",
-    "    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss', marker='s')\n",
-    "    plt.xlabel('Epochs')\n",
-    "    plt.ylabel('Loss')\n",
-    "    plt.title('Training and Validation Loss Over Epochs')\n",
-    "    plt.legend()\n",
-    "    plt.grid()\n",
-    "    plt.show()\n",
-    "\n",
-    "# Initialize VHR-10 dataset\n",
-    "train_dataset = VHR10(root=\"data\", split=\"positive\", transforms=None, download=True)\n",
-    "val_dataset = VHR10(root=\"data\", split=\"positive\", transforms=None)\n",
-    "\n",
-    "# Subset for quick experimentation (adjust N as needed)\n",
-    "N = 100\n",
-    "train_subset = Subset(train_dataset, list(range(N)))\n",
-    "val_subset = Subset(val_dataset, list(range(N)))\n",
-    "\n",
-    "if __name__ == '__main__':\n",
-    "    import multiprocessing\n",
-    "    multiprocessing.set_start_method('spawn', force=True)\n",
-    "\n",
-    "    train_loader = DataLoader(train_subset, batch_size=8, shuffle=True, num_workers=1, collate_fn=collate_fn)\n",
-    "    val_loader = DataLoader(val_subset, batch_size=8, shuffle=False, num_workers=1, collate_fn=collate_fn)\n",
-    "\n",
-    "    # Trainer setup\n",
-    "    trainer = pl.Trainer(\n",
-    "        max_epochs=5, \n",
-    "        accelerator=\"gpu\" if torch.cuda.is_available() else \"cpu\",\n",
-    "        devices=1\n",
-    "    )\n",
-    "\n",
-    "    task = InstanceSegmentationTask(\n",
-    "        model=\"mask_rcnn\",          \n",
-    "        backbone=\"resnet50\",        \n",
-    "        weights=\"imagenet\",         # Pretrained on ImageNet\n",
-    "        num_classes=11,             # VHR-10 has 10 classes + 1 background\n",
-    "        lr=1e-3,                    \n",
-    "        freeze_backbone=False       \n",
-    "    )\n",
-    "\n",
-    "    print('\\nSTART TRAINING\\n')\n",
-    "    # trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)\n",
-    "    train_losses, val_losses = [], []\n",
-    "    for epoch in range(5):\n",
-    "        trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)\n",
-    "        train_loss = task.trainer.callback_metrics.get(\"train_loss\")\n",
-    "        val_loss = task.trainer.callback_metrics.get(\"val_loss\")\n",
-    "        if train_loss is not None:\n",
-    "            train_losses.append(train_loss.item())\n",
-    "        if val_loss is not None:\n",
-    "            val_losses.append(val_loss.item())\n",
-    "    \n",
-    "    plot_losses(train_losses, val_losses)\n",
-    "\n",
-    "    #trainer.test(task, dataloaders=val_loader)\n",
-    "\n",
-    "    # Inference and Visualization\n",
-    "    sample = train_dataset[1]\n",
-    "    image = sample['image'].unsqueeze(0)  \n",
-    "    predictions = task.predict_step({\"image\": image}, batch_idx=0)\n",
-    "    visualize_predictions(image[0], predictions[0], sample)\n",
-    "\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "id": "gQBpL3DTHh2v",
+        "outputId": "0f96c780-21d7-42fe-fc97-db77155f826c",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 383
+        }
+      },
+      "outputs": [
+        {
+          "output_type": "error",
+          "ename": "ModuleNotFoundError",
+          "evalue": "No module named 'lightning'",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-1-830d39adcfdd>\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mlightning\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpytorch\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpl\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mDataLoader\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSubset\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtorchgeo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatasets\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mVHR10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtorchvision\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransforms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunctional\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mto_pil_image\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'lightning'",
+            "",
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"
+          ],
+          "errorDetails": {
+            "actions": [
+              {
+                "action": "open_url",
+                "actionText": "Open Examples",
+                "url": "/notebooks/snippets/importing_libraries.ipynb"
+              }
+            ]
+          }
+        }
+      ],
+      "source": [
+        "!pip install torch torchvision torchgeo lightning matplotlib\n",
+        "\n",
+        "import torch\n",
+        "import lightning.pytorch as pl\n",
+        "from torch.utils.data import DataLoader, Subset\n",
+        "from torchgeo.datasets import VHR10\n",
+        "from torchvision.transforms.functional import to_pil_image\n",
+        "from matplotlib.patches import Rectangle\n",
+        "import matplotlib.pyplot as plt\n",
+        "import torch.nn.functional as F\n",
+        "from torchgeo.trainers import InstanceSegmentationTask\n",
+        "import matplotlib.patches as patches\n",
+        "import numpy as np\n",
+        "\n",
+        "def collate_fn(batch):\n",
+        "    \"\"\"Custom collate function for DataLoader.\"\"\"\n",
+        "    max_height = max(sample['image'].shape[1] for sample in batch)\n",
+        "    max_width = max(sample['image'].shape[2] for sample in batch)\n",
+        "\n",
+        "    images = torch.stack([\n",
+        "        F.pad(sample['image'], (0, max_width - sample['image'].shape[2], 0, max_height - sample['image'].shape[1]))\n",
+        "        for sample in batch\n",
+        "    ])\n",
+        "\n",
+        "    targets = [\n",
+        "        {\n",
+        "            \"labels\": sample[\"labels\"].to(torch.int64),\n",
+        "            \"boxes\": sample[\"boxes\"].to(torch.float32),\n",
+        "            \"masks\": F.pad(\n",
+        "                sample[\"masks\"],\n",
+        "                (0, max_width - sample[\"masks\"].shape[2], 0, max_height - sample[\"masks\"].shape[1]),\n",
+        "            ).to(torch.uint8),\n",
+        "        }\n",
+        "        for sample in batch\n",
+        "    ]\n",
+        "\n",
+        "    return {\"image\": images, \"target\": targets}\n",
+        "\n",
+        "def visualize_predictions(image, predictions, targets):\n",
+        "    \"\"\"Visualize predictions and ground truth.\"\"\"\n",
+        "    image = to_pil_image(image)\n",
+        "\n",
+        "    fig, ax = plt.subplots(1, 1, figsize=(10, 10))\n",
+        "    ax.imshow(image)\n",
+        "\n",
+        "    # Predictions\n",
+        "    for box, label in zip(predictions['boxes'], predictions['labels']):\n",
+        "        x1, y1, x2, y2 = box\n",
+        "        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='red', facecolor='none')\n",
+        "        ax.add_patch(rect)\n",
+        "        ax.text(x1, y1, f\"Pred: {label.item()}\", color='red', fontsize=12)\n",
+        "\n",
+        "    # Ground truth\n",
+        "    for box, label in zip(targets['boxes'], targets['labels']):\n",
+        "        x1, y1, x2, y2 = box\n",
+        "        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='blue', facecolor='none')\n",
+        "        ax.add_patch(rect)\n",
+        "        ax.text(x1, y1, f\"GT: {label.item()}\", color='blue', fontsize=12)\n",
+        "\n",
+        "    plt.show()\n",
+        "\n",
+        "def plot_losses(train_losses, val_losses):\n",
+        "    \"\"\"Plot training and validation losses over epochs.\"\"\"\n",
+        "    plt.figure(figsize=(10, 5))\n",
+        "    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Training Loss', marker='o')\n",
+        "    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss', marker='s')\n",
+        "    plt.xlabel('Epochs')\n",
+        "    plt.ylabel('Loss')\n",
+        "    plt.title('Training and Validation Loss Over Epochs')\n",
+        "    plt.legend()\n",
+        "    plt.grid()\n",
+        "    plt.show()\n",
+        "\n",
+        "# Initialize VHR-10 dataset\n",
+        "train_dataset = VHR10(root=\"data\", split=\"positive\", transforms=None, download=True)\n",
+        "val_dataset = VHR10(root=\"data\", split=\"positive\", transforms=None)\n",
+        "\n",
+        "# Subset for quick experimentation (adjust N as needed)\n",
+        "N = 100\n",
+        "train_subset = Subset(train_dataset, list(range(N)))\n",
+        "val_subset = Subset(val_dataset, list(range(N)))\n",
+        "\n",
+        "if __name__ == '__main__':\n",
+        "    import multiprocessing\n",
+        "    multiprocessing.set_start_method('spawn', force=True)\n",
+        "\n",
+        "    train_loader = DataLoader(train_subset, batch_size=8, shuffle=True, num_workers=1, collate_fn=collate_fn)\n",
+        "    val_loader = DataLoader(val_subset, batch_size=8, shuffle=False, num_workers=1, collate_fn=collate_fn)\n",
+        "\n",
+        "    # Trainer setup\n",
+        "    trainer = pl.Trainer(\n",
+        "        max_epochs=5,\n",
+        "        accelerator=\"gpu\" if torch.cuda.is_available() else \"cpu\",\n",
+        "        devices=1\n",
+        "    )\n",
+        "\n",
+        "    task = InstanceSegmentationTask(\n",
+        "        model=\"mask_rcnn\",\n",
+        "        backbone=\"resnet50\",\n",
+        "        weights=\"imagenet\",         # Pretrained on ImageNet\n",
+        "        num_classes=11,             # VHR-10 has 10 classes + 1 background\n",
+        "        lr=1e-3,\n",
+        "        freeze_backbone=False\n",
+        "    )\n",
+        "\n",
+        "    print('\\nSTART TRAINING\\n')\n",
+        "    # trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)\n",
+        "    train_losses, val_losses = [], []\n",
+        "    for epoch in range(5):\n",
+        "        trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)\n",
+        "        train_loss = task.trainer.callback_metrics.get(\"train_loss\")\n",
+        "        val_loss = task.trainer.callback_metrics.get(\"val_loss\")\n",
+        "        if train_loss is not None:\n",
+        "            train_losses.append(train_loss.item())\n",
+        "        if val_loss is not None:\n",
+        "            val_losses.append(val_loss.item())\n",
+        "\n",
+        "    plot_losses(train_losses, val_losses)\n",
+        "\n",
+        "    #trainer.test(task, dataloaders=val_loader)\n",
+        "\n",
+        "    # Inference and Visualization\n",
+        "    sample = train_dataset[1]\n",
+        "    image = sample['image'].unsqueeze(0)\n",
+        "    predictions = task.predict_step({\"image\": image}, batch_idx=0)\n",
+        "    visualize_predictions(image[0], predictions[0], sample)\n",
+        "\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.12.0"
+    },
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "accelerator": "GPU"
   },
-  "language_info": {
-   "name": "python",
-   "version": "3.12.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file

From d70f1e396b955930e78b1a57fe93e4c0280b7469 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Tue, 28 Jan 2025 20:51:28 +0100
Subject: [PATCH 16/23] Creato con Colab

---
 test_trainer.ipynb | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/test_trainer.ipynb b/test_trainer.ipynb
index 34d06eaa744..67a503f4eab 100644
--- a/test_trainer.ipynb
+++ b/test_trainer.ipynb
@@ -2,7 +2,19 @@
   "cells": [
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive', force_remount=True)"
+      ],
+      "metadata": {
+        "id": "S0al7K8Fc0Xa"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "id": "gQBpL3DTHh2v",
         "outputId": "0f96c780-21d7-42fe-fc97-db77155f826c",
@@ -50,6 +62,7 @@
         "import matplotlib.patches as patches\n",
         "import numpy as np\n",
         "\n",
+        "\n",
         "def collate_fn(batch):\n",
         "    \"\"\"Custom collate function for DataLoader.\"\"\"\n",
         "    max_height = max(sample['image'].shape[1] for sample in batch)\n",

From 1e68d2db5f14cdf3c9352f04f61ef57ea44492bb Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Tue, 28 Jan 2025 21:22:59 +0100
Subject: [PATCH 17/23] Creato con Colab

---
 test_trainer.ipynb | 268 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 257 insertions(+), 11 deletions(-)

diff --git a/test_trainer.ipynb b/test_trainer.ipynb
index 67a503f4eab..66fbaf12af3 100644
--- a/test_trainer.ipynb
+++ b/test_trainer.ipynb
@@ -3,21 +3,269 @@
     {
       "cell_type": "code",
       "source": [
+        "# !rm -rf /root/.config/Google\n",
         "from google.colab import drive\n",
         "drive.mount('/content/drive', force_remount=True)"
       ],
       "metadata": {
-        "id": "S0al7K8Fc0Xa"
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "S0al7K8Fc0Xa",
+        "outputId": "33d797ab-ceda-47b9-c7bb-671150cf29b4"
+      },
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install torch torchvision torchgeo lightning matplotlib"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "pBA2DGC4eOmt",
+        "outputId": "d54832c7-2ae2-4d3b-c240-09532b81b90b"
       },
-      "execution_count": null,
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.5.1+cu121)\n",
+            "Requirement already satisfied: torchvision in /usr/local/lib/python3.11/dist-packages (0.20.1+cu121)\n",
+            "Collecting torchgeo\n",
+            "  Downloading torchgeo-0.6.2-py3-none-any.whl.metadata (19 kB)\n",
+            "Collecting lightning\n",
+            "  Downloading lightning-2.5.0.post0-py3-none-any.whl.metadata (40 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.4/40.4 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (3.10.0)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch) (3.17.0)\n",
+            "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.11/dist-packages (from torch) (4.12.2)\n",
+            "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n",
+            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.5)\n",
+            "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch) (2024.10.0)\n",
+            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch) (9.1.0.70)\n",
+            "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.3.1)\n",
+            "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch) (11.0.2.54)\n",
+            "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch) (10.3.2.106)\n",
+            "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch) (11.4.5.107)\n",
+            "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.0.106)\n",
+            "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n",
+            "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.105)\n",
+            "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.0)\n",
+            "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n",
+            "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch) (12.8.61)\n",
+            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n",
+            "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from torchvision) (1.26.4)\n",
+            "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.11/dist-packages (from torchvision) (11.1.0)\n",
+            "Requirement already satisfied: einops>=0.3 in /usr/local/lib/python3.11/dist-packages (from torchgeo) (0.8.0)\n",
+            "Collecting fiona>=1.8.21 (from torchgeo)\n",
+            "  Downloading fiona-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (56 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.6/56.6 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hCollecting kornia>=0.7.3 (from torchgeo)\n",
+            "  Downloading kornia-0.8.0-py2.py3-none-any.whl.metadata (17 kB)\n",
+            "Collecting lightly!=1.4.26,>=1.4.5 (from torchgeo)\n",
+            "  Downloading lightly-1.5.18-py3-none-any.whl.metadata (36 kB)\n",
+            "Requirement already satisfied: pandas>=1.3.3 in /usr/local/lib/python3.11/dist-packages (from torchgeo) (2.2.2)\n",
+            "Requirement already satisfied: pyproj>=3.3 in /usr/local/lib/python3.11/dist-packages (from torchgeo) (3.7.0)\n",
+            "Collecting rasterio!=1.4.0,!=1.4.1,!=1.4.2,>=1.3 (from torchgeo)\n",
+            "  Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)\n",
+            "Collecting rtree>=1 (from torchgeo)\n",
+            "  Downloading Rtree-1.3.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.1 kB)\n",
+            "Collecting segmentation-models-pytorch>=0.2 (from torchgeo)\n",
+            "  Downloading segmentation_models_pytorch-0.4.0-py3-none-any.whl.metadata (32 kB)\n",
+            "Requirement already satisfied: shapely>=1.8 in /usr/local/lib/python3.11/dist-packages (from torchgeo) (2.0.6)\n",
+            "Requirement already satisfied: timm>=0.4.12 in /usr/local/lib/python3.11/dist-packages (from torchgeo) (1.0.14)\n",
+            "Collecting torchmetrics>=0.10 (from torchgeo)\n",
+            "  Downloading torchmetrics-1.6.1-py3-none-any.whl.metadata (21 kB)\n",
+            "Requirement already satisfied: PyYAML<8.0,>=5.4 in /usr/local/lib/python3.11/dist-packages (from lightning) (6.0.2)\n",
+            "Collecting lightning-utilities<2.0,>=0.10.0 (from lightning)\n",
+            "  Downloading lightning_utilities-0.11.9-py3-none-any.whl.metadata (5.2 kB)\n",
+            "Requirement already satisfied: packaging<25.0,>=20.0 in /usr/local/lib/python3.11/dist-packages (from lightning) (24.2)\n",
+            "Requirement already satisfied: tqdm<6.0,>=4.57.0 in /usr/local/lib/python3.11/dist-packages (from lightning) (4.67.1)\n",
+            "Collecting pytorch-lightning (from lightning)\n",
+            "  Downloading pytorch_lightning-2.5.0.post0-py3-none-any.whl.metadata (21 kB)\n",
+            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.3.1)\n",
+            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (0.12.1)\n",
+            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (4.55.6)\n",
+            "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.4.8)\n",
+            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (3.2.1)\n",
+            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (2.8.2)\n",
+            "Requirement already satisfied: attrs>=19.2.0 in /usr/local/lib/python3.11/dist-packages (from fiona>=1.8.21->torchgeo) (24.3.0)\n",
+            "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from fiona>=1.8.21->torchgeo) (2024.12.14)\n",
+            "Requirement already satisfied: click~=8.0 in /usr/local/lib/python3.11/dist-packages (from fiona>=1.8.21->torchgeo) (8.1.8)\n",
+            "Collecting click-plugins>=1.0 (from fiona>=1.8.21->torchgeo)\n",
+            "  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)\n",
+            "Collecting cligj>=0.5 (from fiona>=1.8.21->torchgeo)\n",
+            "  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)\n",
+            "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]<2026.0,>=2022.5.0->lightning) (3.11.11)\n",
+            "Collecting kornia_rs>=0.1.0 (from kornia>=0.7.3->torchgeo)\n",
+            "  Downloading kornia_rs-0.1.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)\n",
+            "Collecting hydra-core>=1.0.0 (from lightly!=1.4.26,>=1.4.5->torchgeo)\n",
+            "  Downloading hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB)\n",
+            "Collecting lightly_utils~=0.0.0 (from lightly!=1.4.26,>=1.4.5->torchgeo)\n",
+            "  Downloading lightly_utils-0.0.2-py3-none-any.whl.metadata (1.4 kB)\n",
+            "Requirement already satisfied: requests>=2.23.0 in /usr/local/lib/python3.11/dist-packages (from lightly!=1.4.26,>=1.4.5->torchgeo) (2.32.3)\n",
+            "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.11/dist-packages (from lightly!=1.4.26,>=1.4.5->torchgeo) (1.17.0)\n",
+            "Requirement already satisfied: pydantic>=1.10.5 in /usr/local/lib/python3.11/dist-packages (from lightly!=1.4.26,>=1.4.5->torchgeo) (2.10.6)\n",
+            "Requirement already satisfied: urllib3>=1.25.3 in /usr/local/lib/python3.11/dist-packages (from lightly!=1.4.26,>=1.4.5->torchgeo) (2.3.0)\n",
+            "Collecting aenum>=3.1.11 (from lightly!=1.4.26,>=1.4.5->torchgeo)\n",
+            "  Downloading aenum-3.1.15-py3-none-any.whl.metadata (3.7 kB)\n",
+            "Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from lightning-utilities<2.0,>=0.10.0->lightning) (75.1.0)\n",
+            "Collecting jsonargparse<5.0,>=4.27.7 (from jsonargparse[signatures]<5.0,>=4.27.7; extra == \"pytorch-extra\"->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo)\n",
+            "  Downloading jsonargparse-4.36.0-py3-none-any.whl.metadata (12 kB)\n",
+            "Collecting omegaconf<3.0,>=2.2.3 (from lightning[pytorch-extra]!=2.3.*,>=2->torchgeo)\n",
+            "  Downloading omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)\n",
+            "Requirement already satisfied: rich<14.0,>=12.3.0 in /usr/local/lib/python3.11/dist-packages (from lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (13.9.4)\n",
+            "Collecting tensorboardX<3.0,>=2.2 (from lightning[pytorch-extra]!=2.3.*,>=2->torchgeo)\n",
+            "  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl.metadata (5.8 kB)\n",
+            "Collecting bitsandbytes<1.0,>=0.44.0 (from lightning[pytorch-extra]!=2.3.*,>=2->torchgeo)\n",
+            "  Downloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)\n",
+            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.3.3->torchgeo) (2024.2)\n",
+            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.3.3->torchgeo) (2025.1)\n",
+            "Collecting affine (from rasterio!=1.4.0,!=1.4.1,!=1.4.2,>=1.3->torchgeo)\n",
+            "  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)\n",
+            "Collecting efficientnet-pytorch>=0.6.1 (from segmentation-models-pytorch>=0.2->torchgeo)\n",
+            "  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)\n",
+            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "Requirement already satisfied: huggingface-hub>=0.24 in /usr/local/lib/python3.11/dist-packages (from segmentation-models-pytorch>=0.2->torchgeo) (0.27.1)\n",
+            "Collecting pretrainedmodels>=0.7.1 (from segmentation-models-pytorch>=0.2->torchgeo)\n",
+            "  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.8/58.8 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "Requirement already satisfied: safetensors in /usr/local/lib/python3.11/dist-packages (from timm>=0.4.12->torchgeo) (0.5.2)\n",
+            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch) (3.0.2)\n",
+            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning) (2.4.4)\n",
+            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning) (1.3.2)\n",
+            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning) (1.5.0)\n",
+            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning) (6.1.0)\n",
+            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning) (0.2.1)\n",
+            "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning) (1.18.3)\n",
+            "Collecting antlr4-python3-runtime==4.9.* (from hydra-core>=1.0.0->lightly!=1.4.26,>=1.4.5->torchgeo)\n",
+            "  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "Requirement already satisfied: docstring-parser>=0.15 in /usr/local/lib/python3.11/dist-packages (from jsonargparse[signatures]<5.0,>=4.27.7; extra == \"pytorch-extra\"->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (0.16)\n",
+            "Collecting typeshed-client>=2.1.0 (from jsonargparse[signatures]<5.0,>=4.27.7; extra == \"pytorch-extra\"->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo)\n",
+            "  Downloading typeshed_client-2.7.0-py3-none-any.whl.metadata (7.9 kB)\n",
+            "Collecting munch (from pretrainedmodels>=0.7.1->segmentation-models-pytorch>=0.2->torchgeo)\n",
+            "  Downloading munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)\n",
+            "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=1.10.5->lightly!=1.4.26,>=1.4.5->torchgeo) (0.7.0)\n",
+            "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=1.10.5->lightly!=1.4.26,>=1.4.5->torchgeo) (2.27.2)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.23.0->lightly!=1.4.26,>=1.4.5->torchgeo) (3.4.1)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.23.0->lightly!=1.4.26,>=1.4.5->torchgeo) (3.10)\n",
+            "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14.0,>=12.3.0->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (3.0.0)\n",
+            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14.0,>=12.3.0->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (2.18.0)\n",
+            "Requirement already satisfied: protobuf>=3.20 in /usr/local/lib/python3.11/dist-packages (from tensorboardX<3.0,>=2.2->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (4.25.6)\n",
+            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14.0,>=12.3.0->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (0.1.2)\n",
+            "Requirement already satisfied: importlib-resources>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from typeshed-client>=2.1.0->jsonargparse[signatures]<5.0,>=4.27.7; extra == \"pytorch-extra\"->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (6.5.2)\n",
+            "Downloading torchgeo-0.6.2-py3-none-any.whl (454 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m26.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading lightning-2.5.0.post0-py3-none-any.whl (815 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m815.2/815.2 kB\u001b[0m \u001b[31m45.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading fiona-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m44.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading kornia-0.8.0-py2.py3-none-any.whl (1.1 MB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m66.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading lightly-1.5.18-py3-none-any.whl (849 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m849.0/849.0 kB\u001b[0m \u001b[31m58.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading lightning_utilities-0.11.9-py3-none-any.whl (28 kB)\n",
+            "Downloading pytorch_lightning-2.5.0.post0-py3-none-any.whl (819 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m819.3/819.3 kB\u001b[0m \u001b[31m55.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m22.2/22.2 MB\u001b[0m \u001b[31m83.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading Rtree-1.3.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (543 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m543.2/543.2 kB\u001b[0m \u001b[31m45.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading segmentation_models_pytorch-0.4.0-py3-none-any.whl (121 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.3/121.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading torchmetrics-1.6.1-py3-none-any.whl (927 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m927.3/927.3 kB\u001b[0m \u001b[31m61.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading aenum-3.1.15-py3-none-any.whl (137 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.6/137.6 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl (69.7 MB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.7/69.7 MB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)\n",
+            "Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)\n",
+            "Downloading hydra_core-1.3.2-py3-none-any.whl (154 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.5/154.5 kB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading jsonargparse-4.36.0-py3-none-any.whl (214 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m214.5/214.5 kB\u001b[0m \u001b[31m19.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading kornia_rs-0.1.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m80.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading lightly_utils-0.0.2-py3-none-any.whl (6.4 kB)\n",
+            "Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.7/101.7 kB\u001b[0m \u001b[31m11.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading affine-2.4.0-py3-none-any.whl (15 kB)\n",
+            "Downloading typeshed_client-2.7.0-py3-none-any.whl (624 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m624.4/624.4 kB\u001b[0m \u001b[31m49.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading munch-4.0.0-py2.py3-none-any.whl (9.9 kB)\n",
+            "Building wheels for collected packages: efficientnet-pytorch, antlr4-python3-runtime, pretrainedmodels\n",
+            "  Building wheel for efficientnet-pytorch (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16424 sha256=90f30150a6591e8e0de5f7170e32716068df4c4732c5295b8abe4d3e45481de9\n",
+            "  Stored in directory: /root/.cache/pip/wheels/8b/6f/9b/231a832f811ab6ebb1b32455b177ffc6b8b1cd8de19de70c09\n",
+            "  Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144555 sha256=307a00d9480659ee147229d4122e207eff86c880a5aa293f1252c31aa28adda2\n",
+            "  Stored in directory: /root/.cache/pip/wheels/1a/97/32/461f837398029ad76911109f07047fde1d7b661a147c7c56d1\n",
+            "  Building wheel for pretrainedmodels (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for pretrainedmodels: filename=pretrainedmodels-0.7.4-py3-none-any.whl size=60944 sha256=074399a3b3b372eacba5cc2d25666e94f8e59e253fa4efd9387db579e8645d5d\n",
+            "  Stored in directory: /root/.cache/pip/wheels/5f/5b/96/fd94bc35962d7c6b699e8814db545155ac91d2b95785e1b035\n",
+            "Successfully built efficientnet-pytorch antlr4-python3-runtime pretrainedmodels\n",
+            "Installing collected packages: antlr4-python3-runtime, aenum, typeshed-client, tensorboardX, rtree, omegaconf, munch, lightning-utilities, lightly_utils, kornia_rs, jsonargparse, cligj, click-plugins, affine, rasterio, hydra-core, fiona, torchmetrics, kornia, efficientnet-pytorch, bitsandbytes, pytorch-lightning, pretrainedmodels, segmentation-models-pytorch, lightning, lightly, torchgeo\n",
+            "Successfully installed aenum-3.1.15 affine-2.4.0 antlr4-python3-runtime-4.9.3 bitsandbytes-0.45.1 click-plugins-1.1.1 cligj-0.7.2 efficientnet-pytorch-0.7.1 fiona-1.10.1 hydra-core-1.3.2 jsonargparse-4.36.0 kornia-0.8.0 kornia_rs-0.1.8 lightly-1.5.18 lightly_utils-0.0.2 lightning-2.5.0.post0 lightning-utilities-0.11.9 munch-4.0.0 omegaconf-2.3.0 pretrainedmodels-0.7.4 pytorch-lightning-2.5.0.post0 rasterio-1.4.3 rtree-1.3.0 segmentation-models-pytorch-0.4.0 tensorboardX-2.6.2.2 torchgeo-0.6.2 torchmetrics-1.6.1 typeshed-client-2.7.0\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.colab-display-data+json": {
+              "pip_warning": {
+                "packages": [
+                  "pydevd_plugins"
+                ]
+              },
+              "id": "000206bb647443049eb175fcbd4dd95d"
+            }
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import sys\n",
+        "sys.path.append('/content/drive/MyDrive/Colab/torchgeo')"
+      ],
+      "metadata": {
+        "id": "NCiVqQctfi3F"
+      },
+      "execution_count": 4,
       "outputs": []
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 5,
       "metadata": {
         "id": "gQBpL3DTHh2v",
-        "outputId": "0f96c780-21d7-42fe-fc97-db77155f826c",
+        "outputId": "24eccf4b-03af-4b43-f375-c9d9f04999a4",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 383
@@ -26,13 +274,13 @@
       "outputs": [
         {
           "output_type": "error",
-          "ename": "ModuleNotFoundError",
-          "evalue": "No module named 'lightning'",
+          "ename": "ImportError",
+          "evalue": "cannot import name 'InstanceSegmentationTask' from 'torchgeo.trainers' (/usr/local/lib/python3.11/dist-packages/torchgeo/trainers/__init__.py)",
           "traceback": [
             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-1-830d39adcfdd>\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mlightning\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpytorch\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpl\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mDataLoader\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSubset\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtorchgeo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatasets\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mVHR10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtorchvision\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransforms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunctional\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mto_pil_image\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'lightning'",
+            "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-5-2ffd7158768a>\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunctional\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mtorchgeo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrainers\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mInstanceSegmentationTask\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpatches\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpatches\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mImportError\u001b[0m: cannot import name 'InstanceSegmentationTask' from 'torchgeo.trainers' (/usr/local/lib/python3.11/dist-packages/torchgeo/trainers/__init__.py)",
             "",
             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"
           ],
@@ -48,8 +296,6 @@
         }
       ],
       "source": [
-        "!pip install torch torchvision torchgeo lightning matplotlib\n",
-        "\n",
         "import torch\n",
         "import lightning.pytorch as pl\n",
         "from torch.utils.data import DataLoader, Subset\n",

From 96648346ed403e062ad2a8cc1ad71a99d56002d3 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Wed, 5 Feb 2025 20:35:44 +0100
Subject: [PATCH 18/23] Update instance_segmentation.py

---
 torchgeo/trainers/instance_segmentation.py | 77 ++++++++++------------
 1 file changed, 36 insertions(+), 41 deletions(-)

diff --git a/torchgeo/trainers/instance_segmentation.py b/torchgeo/trainers/instance_segmentation.py
index c705bb912aa..7e1cefeb12d 100644
--- a/torchgeo/trainers/instance_segmentation.py
+++ b/torchgeo/trainers/instance_segmentation.py
@@ -11,10 +11,12 @@
 from torchmetrics import MetricCollection
 from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
-from .base import BaseTask  
+from torchgeo.trainers.base import BaseTask  
 import matplotlib.pyplot as plt
 from matplotlib.figure import Figure
-from ..datasets import RGBBandsMissingError, unbind_samples
+from torchgeo.datasets import RGBBandsMissingError, unbind_samples
+from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
+import numpy as np
 
 class InstanceSegmentationTask(BaseTask):
     """Instance Segmentation."""
@@ -66,7 +68,7 @@ def configure_models(self) -> None:
 
         if model == 'mask_rcnn':
             # Load the Mask R-CNN model with a ResNet50 backbone
-            self.model = maskrcnn_resnet50_fpn(weights=MaskRCNN_ResNet50_FPN_Weights.DEFAULT)  
+            self.model = maskrcnn_resnet50_fpn(weights=MaskRCNN_ResNet50_FPN_Weights.DEFAULT, rpn_nms_thresh=0.5, box_nms_thresh=0.3)  
 
             # Update the classification head to predict `num_classes` 
             in_features = self.model.roi_heads.box_predictor.cls_score.in_features
@@ -75,9 +77,10 @@ def configure_models(self) -> None:
 
             # Update the mask head for instance segmentation
             in_features_mask = self.model.roi_heads.mask_predictor.conv5_mask.in_channels
-            self.model.roi_heads.mask_predictor = nn.ConvTranspose2d(
-                in_features_mask, num_classes, kernel_size=2, stride=2
-            )
+
+            hidden_layer = 256
+            self.model.roi_heads.mask_predictor = MaskRCNNPredictor(
+                 in_features_mask, hidden_layer, num_classes)
 
         else:
             raise ValueError(
@@ -114,9 +117,7 @@ def training_step(self, batch: Any, batch_idx: int) -> Tensor:
         loss_dict = self.model(images, targets)               
         loss = sum(loss for loss in loss_dict.values())  
 
-        print('\nTRAINING LOSS\n')
-        print(loss_dict, '\n\n')
-        print(loss)
+        print(f"\nTRAINING STEP LOSS: {loss.item()}")
 
         self.log('train_loss', loss, batch_size=len(images))  
         return loss  
@@ -134,20 +135,21 @@ def validation_step(self, batch: Any, batch_idx: int) -> None:
         batch_size = images.shape[0]
          
         outputs = self.model(images) 
-        loss_dict = self.model(images, targets)  # list of dictionaries
-        total_loss = sum(loss_item for loss_dict in loss_dict for loss_item in loss_dict.values() if loss_item.ndim == 0)
+        loss_dict_list = self.model(images, targets)  # list of dictionaries
+        total_loss = sum(
+            sum(loss_item for loss_item in loss_dict.values() if loss_item.ndim == 0)
+            for loss_dict in loss_dict_list
+        )
 
         for target in targets:
             target["masks"] = (target["masks"] > 0).to(torch.uint8)
             target["boxes"] = target["boxes"].to(torch.float32)
             target["labels"] = target["labels"].to(torch.int64)
-        
-        # Post-process the outputs to ensure masks are in the correct format
+
         for output in outputs:
             if "masks" in output:
                 output["masks"] = (output["masks"] > 0.5).squeeze(1).to(torch.uint8)
-        
-        # Sum the losses
+
         self.log('val_loss', total_loss, batch_size=batch_size)
 
         metrics = self.val_metrics(outputs, targets)
@@ -197,8 +199,11 @@ def test_step(self, batch: Any, batch_idx: int) -> None:
         batch_size = images.shape[0]
 
         outputs = self.model(images)
-        loss_dict = self.model(images, targets)  # Compute all losses 
-        total_loss = sum(loss_item for loss_dict in loss_dict for loss_item in loss_dict.values() if loss_item.ndim == 0)
+        loss_dict_list = self.model(images, targets)  # Compute all losses, list of dictonaries (one for every batch element)
+        total_loss = sum(
+            sum(loss_item for loss_item in loss_dict.values() if loss_item.ndim == 0)
+            for loss_dict in loss_dict_list
+        )
 
         for target in targets:
             target["masks"] = target["masks"].to(torch.uint8)
@@ -206,7 +211,8 @@ def test_step(self, batch: Any, batch_idx: int) -> None:
             target["labels"] = target["labels"].to(torch.int64)
 
         for output in outputs:
-            output["masks"] = (output["masks"] > 0.5).squeeze(1).to(torch.uint8)
+            if "masks" in output:
+                output["masks"] = (output["masks"] > 0.5).squeeze(1).to(torch.uint8)
 
         self.log('test_loss', total_loss, batch_size=batch_size)
 
@@ -219,33 +225,22 @@ def test_step(self, batch: Any, batch_idx: int) -> None:
                 value = value.to(torch.float32).mean()
             scalar_metrics[key] = value
 
-        self.log_dict(scalar_metrics, batch_size=batch_size)
-
-        print('\nTESTING LOSS\n')
-        print(loss_dict, '\n\n')
-        print(total_loss)
+        self.log_dict(scalar_metrics, batch_size=batch_size)    
 
     def predict_step(self, batch: Any, batch_idx: int) -> Any:
-        """Perform inference on a batch of images.
-
-        Args:
-            batch: A batch of images.
-
-        Returns:
-            Predicted masks and bounding boxes for the batch.
-        """
+        """Perform inference on a batch of images."""
         self.model.eval()
-        images = batch['image']           
-        outputs = self.model(images) 
-
-        for output in outputs:
-            output["masks"] = (output["masks"] > 0.5).to(torch.uint8)
-        return outputs          
-
-
-
-
+        images = batch['image']
 
+        with torch.no_grad():  
+            outputs = self.model(images)
 
+        for output in outputs:
+            keep = output["scores"] > 0.05  
+            output["boxes"] = output["boxes"][keep]
+            output["labels"] = output["labels"][keep]
+            output["scores"] = output["scores"][keep]
+            output["masks"] = (output["masks"] > 0.5).squeeze(1).to(torch.uint8)[keep]
 
+        return outputs   
 

From f80257475500928bb68b24b92feb2d8cf8dd0a41 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Wed, 5 Feb 2025 20:39:07 +0100
Subject: [PATCH 19/23] Delete test_trainer.ipynb

---
 test_trainer.ipynb | 445 ---------------------------------------------
 1 file changed, 445 deletions(-)
 delete mode 100644 test_trainer.ipynb

diff --git a/test_trainer.ipynb b/test_trainer.ipynb
deleted file mode 100644
index 66fbaf12af3..00000000000
--- a/test_trainer.ipynb
+++ /dev/null
@@ -1,445 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "code",
-      "source": [
-        "# !rm -rf /root/.config/Google\n",
-        "from google.colab import drive\n",
-        "drive.mount('/content/drive', force_remount=True)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "S0al7K8Fc0Xa",
-        "outputId": "33d797ab-ceda-47b9-c7bb-671150cf29b4"
-      },
-      "execution_count": 1,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Mounted at /content/drive\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "!pip install torch torchvision torchgeo lightning matplotlib"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 1000
-        },
-        "id": "pBA2DGC4eOmt",
-        "outputId": "d54832c7-2ae2-4d3b-c240-09532b81b90b"
-      },
-      "execution_count": 2,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.5.1+cu121)\n",
-            "Requirement already satisfied: torchvision in /usr/local/lib/python3.11/dist-packages (0.20.1+cu121)\n",
-            "Collecting torchgeo\n",
-            "  Downloading torchgeo-0.6.2-py3-none-any.whl.metadata (19 kB)\n",
-            "Collecting lightning\n",
-            "  Downloading lightning-2.5.0.post0-py3-none-any.whl.metadata (40 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.4/40.4 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (3.10.0)\n",
-            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch) (3.17.0)\n",
-            "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.11/dist-packages (from torch) (4.12.2)\n",
-            "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n",
-            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.5)\n",
-            "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch) (2024.10.0)\n",
-            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.105)\n",
-            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.105)\n",
-            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.105)\n",
-            "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch) (9.1.0.70)\n",
-            "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.3.1)\n",
-            "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch) (11.0.2.54)\n",
-            "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch) (10.3.2.106)\n",
-            "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch) (11.4.5.107)\n",
-            "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.0.106)\n",
-            "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n",
-            "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.105)\n",
-            "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.0)\n",
-            "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n",
-            "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch) (12.8.61)\n",
-            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n",
-            "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from torchvision) (1.26.4)\n",
-            "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.11/dist-packages (from torchvision) (11.1.0)\n",
-            "Requirement already satisfied: einops>=0.3 in /usr/local/lib/python3.11/dist-packages (from torchgeo) (0.8.0)\n",
-            "Collecting fiona>=1.8.21 (from torchgeo)\n",
-            "  Downloading fiona-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (56 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.6/56.6 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hCollecting kornia>=0.7.3 (from torchgeo)\n",
-            "  Downloading kornia-0.8.0-py2.py3-none-any.whl.metadata (17 kB)\n",
-            "Collecting lightly!=1.4.26,>=1.4.5 (from torchgeo)\n",
-            "  Downloading lightly-1.5.18-py3-none-any.whl.metadata (36 kB)\n",
-            "Requirement already satisfied: pandas>=1.3.3 in /usr/local/lib/python3.11/dist-packages (from torchgeo) (2.2.2)\n",
-            "Requirement already satisfied: pyproj>=3.3 in /usr/local/lib/python3.11/dist-packages (from torchgeo) (3.7.0)\n",
-            "Collecting rasterio!=1.4.0,!=1.4.1,!=1.4.2,>=1.3 (from torchgeo)\n",
-            "  Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)\n",
-            "Collecting rtree>=1 (from torchgeo)\n",
-            "  Downloading Rtree-1.3.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.1 kB)\n",
-            "Collecting segmentation-models-pytorch>=0.2 (from torchgeo)\n",
-            "  Downloading segmentation_models_pytorch-0.4.0-py3-none-any.whl.metadata (32 kB)\n",
-            "Requirement already satisfied: shapely>=1.8 in /usr/local/lib/python3.11/dist-packages (from torchgeo) (2.0.6)\n",
-            "Requirement already satisfied: timm>=0.4.12 in /usr/local/lib/python3.11/dist-packages (from torchgeo) (1.0.14)\n",
-            "Collecting torchmetrics>=0.10 (from torchgeo)\n",
-            "  Downloading torchmetrics-1.6.1-py3-none-any.whl.metadata (21 kB)\n",
-            "Requirement already satisfied: PyYAML<8.0,>=5.4 in /usr/local/lib/python3.11/dist-packages (from lightning) (6.0.2)\n",
-            "Collecting lightning-utilities<2.0,>=0.10.0 (from lightning)\n",
-            "  Downloading lightning_utilities-0.11.9-py3-none-any.whl.metadata (5.2 kB)\n",
-            "Requirement already satisfied: packaging<25.0,>=20.0 in /usr/local/lib/python3.11/dist-packages (from lightning) (24.2)\n",
-            "Requirement already satisfied: tqdm<6.0,>=4.57.0 in /usr/local/lib/python3.11/dist-packages (from lightning) (4.67.1)\n",
-            "Collecting pytorch-lightning (from lightning)\n",
-            "  Downloading pytorch_lightning-2.5.0.post0-py3-none-any.whl.metadata (21 kB)\n",
-            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.3.1)\n",
-            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (0.12.1)\n",
-            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (4.55.6)\n",
-            "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.4.8)\n",
-            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (3.2.1)\n",
-            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (2.8.2)\n",
-            "Requirement already satisfied: attrs>=19.2.0 in /usr/local/lib/python3.11/dist-packages (from fiona>=1.8.21->torchgeo) (24.3.0)\n",
-            "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from fiona>=1.8.21->torchgeo) (2024.12.14)\n",
-            "Requirement already satisfied: click~=8.0 in /usr/local/lib/python3.11/dist-packages (from fiona>=1.8.21->torchgeo) (8.1.8)\n",
-            "Collecting click-plugins>=1.0 (from fiona>=1.8.21->torchgeo)\n",
-            "  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)\n",
-            "Collecting cligj>=0.5 (from fiona>=1.8.21->torchgeo)\n",
-            "  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)\n",
-            "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]<2026.0,>=2022.5.0->lightning) (3.11.11)\n",
-            "Collecting kornia_rs>=0.1.0 (from kornia>=0.7.3->torchgeo)\n",
-            "  Downloading kornia_rs-0.1.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)\n",
-            "Collecting hydra-core>=1.0.0 (from lightly!=1.4.26,>=1.4.5->torchgeo)\n",
-            "  Downloading hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB)\n",
-            "Collecting lightly_utils~=0.0.0 (from lightly!=1.4.26,>=1.4.5->torchgeo)\n",
-            "  Downloading lightly_utils-0.0.2-py3-none-any.whl.metadata (1.4 kB)\n",
-            "Requirement already satisfied: requests>=2.23.0 in /usr/local/lib/python3.11/dist-packages (from lightly!=1.4.26,>=1.4.5->torchgeo) (2.32.3)\n",
-            "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.11/dist-packages (from lightly!=1.4.26,>=1.4.5->torchgeo) (1.17.0)\n",
-            "Requirement already satisfied: pydantic>=1.10.5 in /usr/local/lib/python3.11/dist-packages (from lightly!=1.4.26,>=1.4.5->torchgeo) (2.10.6)\n",
-            "Requirement already satisfied: urllib3>=1.25.3 in /usr/local/lib/python3.11/dist-packages (from lightly!=1.4.26,>=1.4.5->torchgeo) (2.3.0)\n",
-            "Collecting aenum>=3.1.11 (from lightly!=1.4.26,>=1.4.5->torchgeo)\n",
-            "  Downloading aenum-3.1.15-py3-none-any.whl.metadata (3.7 kB)\n",
-            "Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from lightning-utilities<2.0,>=0.10.0->lightning) (75.1.0)\n",
-            "Collecting jsonargparse<5.0,>=4.27.7 (from jsonargparse[signatures]<5.0,>=4.27.7; extra == \"pytorch-extra\"->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo)\n",
-            "  Downloading jsonargparse-4.36.0-py3-none-any.whl.metadata (12 kB)\n",
-            "Collecting omegaconf<3.0,>=2.2.3 (from lightning[pytorch-extra]!=2.3.*,>=2->torchgeo)\n",
-            "  Downloading omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)\n",
-            "Requirement already satisfied: rich<14.0,>=12.3.0 in /usr/local/lib/python3.11/dist-packages (from lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (13.9.4)\n",
-            "Collecting tensorboardX<3.0,>=2.2 (from lightning[pytorch-extra]!=2.3.*,>=2->torchgeo)\n",
-            "  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl.metadata (5.8 kB)\n",
-            "Collecting bitsandbytes<1.0,>=0.44.0 (from lightning[pytorch-extra]!=2.3.*,>=2->torchgeo)\n",
-            "  Downloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)\n",
-            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.3.3->torchgeo) (2024.2)\n",
-            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.3.3->torchgeo) (2025.1)\n",
-            "Collecting affine (from rasterio!=1.4.0,!=1.4.1,!=1.4.2,>=1.3->torchgeo)\n",
-            "  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)\n",
-            "Collecting efficientnet-pytorch>=0.6.1 (from segmentation-models-pytorch>=0.2->torchgeo)\n",
-            "  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)\n",
-            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: huggingface-hub>=0.24 in /usr/local/lib/python3.11/dist-packages (from segmentation-models-pytorch>=0.2->torchgeo) (0.27.1)\n",
-            "Collecting pretrainedmodels>=0.7.1 (from segmentation-models-pytorch>=0.2->torchgeo)\n",
-            "  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.8/58.8 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: safetensors in /usr/local/lib/python3.11/dist-packages (from timm>=0.4.12->torchgeo) (0.5.2)\n",
-            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch) (3.0.2)\n",
-            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning) (2.4.4)\n",
-            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning) (1.3.2)\n",
-            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning) (1.5.0)\n",
-            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning) (6.1.0)\n",
-            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning) (0.2.1)\n",
-            "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning) (1.18.3)\n",
-            "Collecting antlr4-python3-runtime==4.9.* (from hydra-core>=1.0.0->lightly!=1.4.26,>=1.4.5->torchgeo)\n",
-            "  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: docstring-parser>=0.15 in /usr/local/lib/python3.11/dist-packages (from jsonargparse[signatures]<5.0,>=4.27.7; extra == \"pytorch-extra\"->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (0.16)\n",
-            "Collecting typeshed-client>=2.1.0 (from jsonargparse[signatures]<5.0,>=4.27.7; extra == \"pytorch-extra\"->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo)\n",
-            "  Downloading typeshed_client-2.7.0-py3-none-any.whl.metadata (7.9 kB)\n",
-            "Collecting munch (from pretrainedmodels>=0.7.1->segmentation-models-pytorch>=0.2->torchgeo)\n",
-            "  Downloading munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)\n",
-            "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=1.10.5->lightly!=1.4.26,>=1.4.5->torchgeo) (0.7.0)\n",
-            "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=1.10.5->lightly!=1.4.26,>=1.4.5->torchgeo) (2.27.2)\n",
-            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.23.0->lightly!=1.4.26,>=1.4.5->torchgeo) (3.4.1)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.23.0->lightly!=1.4.26,>=1.4.5->torchgeo) (3.10)\n",
-            "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14.0,>=12.3.0->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (3.0.0)\n",
-            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14.0,>=12.3.0->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (2.18.0)\n",
-            "Requirement already satisfied: protobuf>=3.20 in /usr/local/lib/python3.11/dist-packages (from tensorboardX<3.0,>=2.2->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (4.25.6)\n",
-            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14.0,>=12.3.0->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (0.1.2)\n",
-            "Requirement already satisfied: importlib-resources>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from typeshed-client>=2.1.0->jsonargparse[signatures]<5.0,>=4.27.7; extra == \"pytorch-extra\"->lightning[pytorch-extra]!=2.3.*,>=2->torchgeo) (6.5.2)\n",
-            "Downloading torchgeo-0.6.2-py3-none-any.whl (454 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.7/454.7 kB\u001b[0m \u001b[31m26.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading lightning-2.5.0.post0-py3-none-any.whl (815 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m815.2/815.2 kB\u001b[0m \u001b[31m45.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading fiona-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m44.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading kornia-0.8.0-py2.py3-none-any.whl (1.1 MB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m66.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading lightly-1.5.18-py3-none-any.whl (849 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m849.0/849.0 kB\u001b[0m \u001b[31m58.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading lightning_utilities-0.11.9-py3-none-any.whl (28 kB)\n",
-            "Downloading pytorch_lightning-2.5.0.post0-py3-none-any.whl (819 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m819.3/819.3 kB\u001b[0m \u001b[31m55.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m22.2/22.2 MB\u001b[0m \u001b[31m83.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading Rtree-1.3.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (543 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m543.2/543.2 kB\u001b[0m \u001b[31m45.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading segmentation_models_pytorch-0.4.0-py3-none-any.whl (121 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.3/121.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading torchmetrics-1.6.1-py3-none-any.whl (927 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m927.3/927.3 kB\u001b[0m \u001b[31m61.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading aenum-3.1.15-py3-none-any.whl (137 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.6/137.6 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl (69.7 MB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.7/69.7 MB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)\n",
-            "Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)\n",
-            "Downloading hydra_core-1.3.2-py3-none-any.whl (154 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.5/154.5 kB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading jsonargparse-4.36.0-py3-none-any.whl (214 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m214.5/214.5 kB\u001b[0m \u001b[31m19.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading kornia_rs-0.1.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m80.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading lightly_utils-0.0.2-py3-none-any.whl (6.4 kB)\n",
-            "Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.7/101.7 kB\u001b[0m \u001b[31m11.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading affine-2.4.0-py3-none-any.whl (15 kB)\n",
-            "Downloading typeshed_client-2.7.0-py3-none-any.whl (624 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m624.4/624.4 kB\u001b[0m \u001b[31m49.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading munch-4.0.0-py2.py3-none-any.whl (9.9 kB)\n",
-            "Building wheels for collected packages: efficientnet-pytorch, antlr4-python3-runtime, pretrainedmodels\n",
-            "  Building wheel for efficientnet-pytorch (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16424 sha256=90f30150a6591e8e0de5f7170e32716068df4c4732c5295b8abe4d3e45481de9\n",
-            "  Stored in directory: /root/.cache/pip/wheels/8b/6f/9b/231a832f811ab6ebb1b32455b177ffc6b8b1cd8de19de70c09\n",
-            "  Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144555 sha256=307a00d9480659ee147229d4122e207eff86c880a5aa293f1252c31aa28adda2\n",
-            "  Stored in directory: /root/.cache/pip/wheels/1a/97/32/461f837398029ad76911109f07047fde1d7b661a147c7c56d1\n",
-            "  Building wheel for pretrainedmodels (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for pretrainedmodels: filename=pretrainedmodels-0.7.4-py3-none-any.whl size=60944 sha256=074399a3b3b372eacba5cc2d25666e94f8e59e253fa4efd9387db579e8645d5d\n",
-            "  Stored in directory: /root/.cache/pip/wheels/5f/5b/96/fd94bc35962d7c6b699e8814db545155ac91d2b95785e1b035\n",
-            "Successfully built efficientnet-pytorch antlr4-python3-runtime pretrainedmodels\n",
-            "Installing collected packages: antlr4-python3-runtime, aenum, typeshed-client, tensorboardX, rtree, omegaconf, munch, lightning-utilities, lightly_utils, kornia_rs, jsonargparse, cligj, click-plugins, affine, rasterio, hydra-core, fiona, torchmetrics, kornia, efficientnet-pytorch, bitsandbytes, pytorch-lightning, pretrainedmodels, segmentation-models-pytorch, lightning, lightly, torchgeo\n",
-            "Successfully installed aenum-3.1.15 affine-2.4.0 antlr4-python3-runtime-4.9.3 bitsandbytes-0.45.1 click-plugins-1.1.1 cligj-0.7.2 efficientnet-pytorch-0.7.1 fiona-1.10.1 hydra-core-1.3.2 jsonargparse-4.36.0 kornia-0.8.0 kornia_rs-0.1.8 lightly-1.5.18 lightly_utils-0.0.2 lightning-2.5.0.post0 lightning-utilities-0.11.9 munch-4.0.0 omegaconf-2.3.0 pretrainedmodels-0.7.4 pytorch-lightning-2.5.0.post0 rasterio-1.4.3 rtree-1.3.0 segmentation-models-pytorch-0.4.0 tensorboardX-2.6.2.2 torchgeo-0.6.2 torchmetrics-1.6.1 typeshed-client-2.7.0\n"
-          ]
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "application/vnd.colab-display-data+json": {
-              "pip_warning": {
-                "packages": [
-                  "pydevd_plugins"
-                ]
-              },
-              "id": "000206bb647443049eb175fcbd4dd95d"
-            }
-          },
-          "metadata": {}
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import sys\n",
-        "sys.path.append('/content/drive/MyDrive/Colab/torchgeo')"
-      ],
-      "metadata": {
-        "id": "NCiVqQctfi3F"
-      },
-      "execution_count": 4,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 5,
-      "metadata": {
-        "id": "gQBpL3DTHh2v",
-        "outputId": "24eccf4b-03af-4b43-f375-c9d9f04999a4",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 383
-        }
-      },
-      "outputs": [
-        {
-          "output_type": "error",
-          "ename": "ImportError",
-          "evalue": "cannot import name 'InstanceSegmentationTask' from 'torchgeo.trainers' (/usr/local/lib/python3.11/dist-packages/torchgeo/trainers/__init__.py)",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-5-2ffd7158768a>\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunctional\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mtorchgeo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrainers\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mInstanceSegmentationTask\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpatches\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpatches\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mImportError\u001b[0m: cannot import name 'InstanceSegmentationTask' from 'torchgeo.trainers' (/usr/local/lib/python3.11/dist-packages/torchgeo/trainers/__init__.py)",
-            "",
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"
-          ],
-          "errorDetails": {
-            "actions": [
-              {
-                "action": "open_url",
-                "actionText": "Open Examples",
-                "url": "/notebooks/snippets/importing_libraries.ipynb"
-              }
-            ]
-          }
-        }
-      ],
-      "source": [
-        "import torch\n",
-        "import lightning.pytorch as pl\n",
-        "from torch.utils.data import DataLoader, Subset\n",
-        "from torchgeo.datasets import VHR10\n",
-        "from torchvision.transforms.functional import to_pil_image\n",
-        "from matplotlib.patches import Rectangle\n",
-        "import matplotlib.pyplot as plt\n",
-        "import torch.nn.functional as F\n",
-        "from torchgeo.trainers import InstanceSegmentationTask\n",
-        "import matplotlib.patches as patches\n",
-        "import numpy as np\n",
-        "\n",
-        "\n",
-        "def collate_fn(batch):\n",
-        "    \"\"\"Custom collate function for DataLoader.\"\"\"\n",
-        "    max_height = max(sample['image'].shape[1] for sample in batch)\n",
-        "    max_width = max(sample['image'].shape[2] for sample in batch)\n",
-        "\n",
-        "    images = torch.stack([\n",
-        "        F.pad(sample['image'], (0, max_width - sample['image'].shape[2], 0, max_height - sample['image'].shape[1]))\n",
-        "        for sample in batch\n",
-        "    ])\n",
-        "\n",
-        "    targets = [\n",
-        "        {\n",
-        "            \"labels\": sample[\"labels\"].to(torch.int64),\n",
-        "            \"boxes\": sample[\"boxes\"].to(torch.float32),\n",
-        "            \"masks\": F.pad(\n",
-        "                sample[\"masks\"],\n",
-        "                (0, max_width - sample[\"masks\"].shape[2], 0, max_height - sample[\"masks\"].shape[1]),\n",
-        "            ).to(torch.uint8),\n",
-        "        }\n",
-        "        for sample in batch\n",
-        "    ]\n",
-        "\n",
-        "    return {\"image\": images, \"target\": targets}\n",
-        "\n",
-        "def visualize_predictions(image, predictions, targets):\n",
-        "    \"\"\"Visualize predictions and ground truth.\"\"\"\n",
-        "    image = to_pil_image(image)\n",
-        "\n",
-        "    fig, ax = plt.subplots(1, 1, figsize=(10, 10))\n",
-        "    ax.imshow(image)\n",
-        "\n",
-        "    # Predictions\n",
-        "    for box, label in zip(predictions['boxes'], predictions['labels']):\n",
-        "        x1, y1, x2, y2 = box\n",
-        "        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='red', facecolor='none')\n",
-        "        ax.add_patch(rect)\n",
-        "        ax.text(x1, y1, f\"Pred: {label.item()}\", color='red', fontsize=12)\n",
-        "\n",
-        "    # Ground truth\n",
-        "    for box, label in zip(targets['boxes'], targets['labels']):\n",
-        "        x1, y1, x2, y2 = box\n",
-        "        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='blue', facecolor='none')\n",
-        "        ax.add_patch(rect)\n",
-        "        ax.text(x1, y1, f\"GT: {label.item()}\", color='blue', fontsize=12)\n",
-        "\n",
-        "    plt.show()\n",
-        "\n",
-        "def plot_losses(train_losses, val_losses):\n",
-        "    \"\"\"Plot training and validation losses over epochs.\"\"\"\n",
-        "    plt.figure(figsize=(10, 5))\n",
-        "    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Training Loss', marker='o')\n",
-        "    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss', marker='s')\n",
-        "    plt.xlabel('Epochs')\n",
-        "    plt.ylabel('Loss')\n",
-        "    plt.title('Training and Validation Loss Over Epochs')\n",
-        "    plt.legend()\n",
-        "    plt.grid()\n",
-        "    plt.show()\n",
-        "\n",
-        "# Initialize VHR-10 dataset\n",
-        "train_dataset = VHR10(root=\"data\", split=\"positive\", transforms=None, download=True)\n",
-        "val_dataset = VHR10(root=\"data\", split=\"positive\", transforms=None)\n",
-        "\n",
-        "# Subset for quick experimentation (adjust N as needed)\n",
-        "N = 100\n",
-        "train_subset = Subset(train_dataset, list(range(N)))\n",
-        "val_subset = Subset(val_dataset, list(range(N)))\n",
-        "\n",
-        "if __name__ == '__main__':\n",
-        "    import multiprocessing\n",
-        "    multiprocessing.set_start_method('spawn', force=True)\n",
-        "\n",
-        "    train_loader = DataLoader(train_subset, batch_size=8, shuffle=True, num_workers=1, collate_fn=collate_fn)\n",
-        "    val_loader = DataLoader(val_subset, batch_size=8, shuffle=False, num_workers=1, collate_fn=collate_fn)\n",
-        "\n",
-        "    # Trainer setup\n",
-        "    trainer = pl.Trainer(\n",
-        "        max_epochs=5,\n",
-        "        accelerator=\"gpu\" if torch.cuda.is_available() else \"cpu\",\n",
-        "        devices=1\n",
-        "    )\n",
-        "\n",
-        "    task = InstanceSegmentationTask(\n",
-        "        model=\"mask_rcnn\",\n",
-        "        backbone=\"resnet50\",\n",
-        "        weights=\"imagenet\",         # Pretrained on ImageNet\n",
-        "        num_classes=11,             # VHR-10 has 10 classes + 1 background\n",
-        "        lr=1e-3,\n",
-        "        freeze_backbone=False\n",
-        "    )\n",
-        "\n",
-        "    print('\\nSTART TRAINING\\n')\n",
-        "    # trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)\n",
-        "    train_losses, val_losses = [], []\n",
-        "    for epoch in range(5):\n",
-        "        trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)\n",
-        "        train_loss = task.trainer.callback_metrics.get(\"train_loss\")\n",
-        "        val_loss = task.trainer.callback_metrics.get(\"val_loss\")\n",
-        "        if train_loss is not None:\n",
-        "            train_losses.append(train_loss.item())\n",
-        "        if val_loss is not None:\n",
-        "            val_losses.append(val_loss.item())\n",
-        "\n",
-        "    plot_losses(train_losses, val_losses)\n",
-        "\n",
-        "    #trainer.test(task, dataloaders=val_loader)\n",
-        "\n",
-        "    # Inference and Visualization\n",
-        "    sample = train_dataset[1]\n",
-        "    image = sample['image'].unsqueeze(0)\n",
-        "    predictions = task.predict_step({\"image\": image}, batch_idx=0)\n",
-        "    visualize_predictions(image[0], predictions[0], sample)\n",
-        "\n"
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python",
-      "version": "3.12.0"
-    },
-    "colab": {
-      "provenance": [],
-      "gpuType": "T4"
-    },
-    "accelerator": "GPU"
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
\ No newline at end of file

From 3c8630641be307214daae9a0a0c4691b290629bb Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Wed, 5 Feb 2025 20:39:24 +0100
Subject: [PATCH 20/23] Delete test_trainer_instancesegmentation.py

---
 test_trainer_instancesegmentation.py | 123 ---------------------------
 1 file changed, 123 deletions(-)
 delete mode 100644 test_trainer_instancesegmentation.py

diff --git a/test_trainer_instancesegmentation.py b/test_trainer_instancesegmentation.py
deleted file mode 100644
index 2073b3e34c4..00000000000
--- a/test_trainer_instancesegmentation.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import torch
-import lightning.pytorch as pl
-from torch.utils.data import DataLoader, Subset
-from torchgeo.datasets import VHR10
-from torchvision.transforms.functional import to_pil_image
-from matplotlib.patches import Rectangle
-import matplotlib.pyplot as plt
-import torch.nn.functional as F
-from torchgeo.trainers import InstanceSegmentationTask  
-
-def collate_fn(batch):
-    """Custom collate function for DataLoader."""
-    max_height = max(sample['image'].shape[1] for sample in batch)
-    max_width = max(sample['image'].shape[2] for sample in batch)
-
-    images = torch.stack([
-        F.pad(sample['image'], (0, max_width - sample['image'].shape[2], 0, max_height - sample['image'].shape[1]))
-        for sample in batch
-    ])
-
-    targets = [
-        {
-            "labels": sample["labels"].to(torch.int64),
-            "boxes": sample["boxes"].to(torch.float32),
-            "masks": F.pad(
-                sample["masks"],
-                (0, max_width - sample["masks"].shape[2], 0, max_height - sample["masks"].shape[1]),
-            ).to(torch.uint8),
-        }
-        for sample in batch
-    ]
-
-    return {"image": images, "target": targets}
-
-def visualize_predictions(image, predictions, targets):
-    """Visualize predictions and ground truth."""
-    image = to_pil_image(image)
-
-    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
-    ax.imshow(image)
-
-    # Predictions
-    for box, label in zip(predictions['boxes'], predictions['labels']):
-        x1, y1, x2, y2 = box
-        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='red', facecolor='none')
-        ax.add_patch(rect)
-        ax.text(x1, y1, f"Pred: {label.item()}", color='red', fontsize=12)
-
-    # Ground truth
-    for box, label in zip(targets['boxes'], targets['labels']):
-        x1, y1, x2, y2 = box
-        rect = Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='blue', facecolor='none')
-        ax.add_patch(rect)
-        ax.text(x1, y1, f"GT: {label.item()}", color='blue', fontsize=12)
-
-    plt.show()
-
-def plot_losses(train_losses, val_losses):
-    """Plot training and validation losses over epochs."""
-    plt.figure(figsize=(10, 5))
-    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Training Loss', marker='o')
-    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss', marker='s')
-    plt.xlabel('Epochs')
-    plt.ylabel('Loss')
-    plt.title('Training and Validation Loss Over Epochs')
-    plt.legend()
-    plt.grid()
-    plt.show()
-
-# Initialize VHR-10 dataset
-train_dataset = VHR10(root="data", split="positive", transforms=None, download=True)
-val_dataset = VHR10(root="data", split="positive", transforms=None)
-
-# Subset for quick experimentation (adjust N as needed)
-N = 100
-train_subset = Subset(train_dataset, list(range(N)))
-val_subset = Subset(val_dataset, list(range(N)))
-
-
-if __name__ == '__main__':
-    import multiprocessing
-    multiprocessing.set_start_method('spawn', force=True)
-
-    train_loader = DataLoader(train_subset, batch_size=8, shuffle=True, num_workers=1, collate_fn=collate_fn)
-    val_loader = DataLoader(val_subset, batch_size=8, shuffle=False, num_workers=1, collate_fn=collate_fn)
-
-    # Trainer setup
-    trainer = pl.Trainer(
-        max_epochs=5, 
-        accelerator="gpu" if torch.cuda.is_available() else "cpu",
-        devices=1
-    )
-
-    task = InstanceSegmentationTask(
-        model="mask_rcnn",          
-        backbone="resnet50",        
-        weights="imagenet",         # Pretrained on ImageNet
-        num_classes=11,             # VHR-10 has 10 classes + 1 background
-        lr=1e-3,                    
-        freeze_backbone=False       
-    )
-
-    print('\nSTART TRAINING\n')
-    # trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)
-    train_losses, val_losses = [], []
-    for epoch in range(5):
-        trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)
-        train_loss = task.trainer.callback_metrics.get("train_loss")
-        val_loss = task.trainer.callback_metrics.get("val_loss")
-        if train_loss is not None:
-            train_losses.append(train_loss.item())
-        if val_loss is not None:
-            val_losses.append(val_loss.item())
-    
-    plot_losses(train_losses, val_losses)
-
-    #trainer.test(task, dataloaders=val_loader)
-
-    # Inference and Visualization
-    sample = train_dataset[1]
-    image = sample['image'].unsqueeze(0)  
-    predictions = task.predict_step({"image": image}, batch_idx=0)
-    visualize_predictions(image[0], predictions[0], sample)

From 7ec3930c73ca17406fc3a812938982204e925cef Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Wed, 5 Feb 2025 20:41:39 +0100
Subject: [PATCH 21/23] Update and rename test_instancesegmentation.py to
 test_instance_segmentation.py

---
 ...test_instancesegmentation.py => test_instance_segmentation.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/trainers/{test_instancesegmentation.py => test_instance_segmentation.py} (100%)

diff --git a/tests/trainers/test_instancesegmentation.py b/tests/trainers/test_instance_segmentation.py
similarity index 100%
rename from tests/trainers/test_instancesegmentation.py
rename to tests/trainers/test_instance_segmentation.py

From 927f7fc6c46818d77937bcf9e823bf73af8ad071 Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Wed, 5 Feb 2025 20:45:06 +0100
Subject: [PATCH 22/23] Update instance_segmentation.py

---
 torchgeo/trainers/instance_segmentation.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/torchgeo/trainers/instance_segmentation.py b/torchgeo/trainers/instance_segmentation.py
index 7e1cefeb12d..b76daa22a39 100644
--- a/torchgeo/trainers/instance_segmentation.py
+++ b/torchgeo/trainers/instance_segmentation.py
@@ -49,13 +49,7 @@ def __init__(
         .. versionadded:: 0.7
         """
         self.weights = weights         
-        super().__init__()              
-        self.save_hyperparameters()     
-        self.model = None               
-        self.validation_outputs = []    
-        self.test_outputs = []          
-        self.configure_models()         
-        self.configure_metrics()        
+        super().__init__()                  
 
     def configure_models(self) -> None:
         """Initialize the model.
@@ -116,9 +110,6 @@ def training_step(self, batch: Any, batch_idx: int) -> Tensor:
         images, targets = batch['image'], batch['target']     
         loss_dict = self.model(images, targets)               
         loss = sum(loss for loss in loss_dict.values())  
-
-        print(f"\nTRAINING STEP LOSS: {loss.item()}")
-
         self.log('train_loss', loss, batch_size=len(images))  
         return loss  
 
@@ -163,7 +154,6 @@ def validation_step(self, batch: Any, batch_idx: int) -> None:
 
         self.log_dict(scalar_metrics, batch_size=batch_size)           
 
-        # check
         if (
             batch_idx < 10
             and hasattr(self.trainer, 'datamodule')

From 4f1cecfb8dff86d3814d72110c76f03e26f9f52f Mon Sep 17 00:00:00 2001
From: Arianna Sole <ariannasole23@gmail.com>
Date: Wed, 5 Feb 2025 20:49:18 +0100
Subject: [PATCH 23/23] Update test_instance_segmentation.py

---
 tests/trainers/test_instance_segmentation.py | 27 --------------------
 1 file changed, 27 deletions(-)

diff --git a/tests/trainers/test_instance_segmentation.py b/tests/trainers/test_instance_segmentation.py
index 99d6a118ac4..527d6960b99 100644
--- a/tests/trainers/test_instance_segmentation.py
+++ b/tests/trainers/test_instance_segmentation.py
@@ -86,22 +86,9 @@ class TestSemanticSegmentationTask:
     def test_trainer(
         self, monkeypatch: MonkeyPatch, name: str, fast_dev_run: bool
     ) -> None:
-        match name:
-            case 'chabud' | 'cabuar':
-                pytest.importorskip('h5py', minversion='3.6')
-            case 'ftw':
-                pytest.importorskip('pyarrow')
-            case 'landcoverai':
-                sha256 = (
-                    'ecec8e871faf1bbd8ca525ca95ddc1c1f5213f40afb94599884bd85f990ebd6b'
-                )
-                monkeypatch.setattr(LandCoverAI, 'sha256', sha256)
 
         config = os.path.join('tests', 'conf', name + '.yaml')
 
-        monkeypatch.setattr(smp, 'Unet', create_model)
-        monkeypatch.setattr(smp, 'DeepLabV3Plus', create_model)
-
         args = [
             '--config',
             config,
@@ -241,17 +228,3 @@ def test_freeze_backbone(self, model_name: str, backbone: str) -> None:
                 for param in model.model.segmentation_head.parameters()
             ]
         )
-
-    # @pytest.mark.parametrize('model_name', ['unet', 'deeplabv3+'])
-    # def test_freeze_decoder(self, model_name: str) -> None:
-    #     model = InstanceSegmentationTask(model=model_name, freeze_decoder=True)
-    #     assert all(
-    #         [param.requires_grad is False for param in model.model.decoder.parameters()]
-    #     )
-    #     assert all([param.requires_grad for param in model.model.encoder.parameters()])
-    #     assert all(
-    #         [
-    #             param.requires_grad
-    #             for param in model.model.segmentation_head.parameters()
-    #         ]
-    #     )