Merge pull request #60 from cedadev/test_fixes

V1.3.2 with release notes
cedadev · Mar 3, 2025 · d755788 · d755788
2 parents 92fba9a + 0c24ee6
commit d755788
Show file tree

Hide file tree

Showing 34 changed files with 1,189 additions and 471 deletions.
diff --git a/docs/source/core/interactive.rst b/docs/source/core/interactive.rst
@@ -231,6 +231,22 @@ A project can also be transferred between two group instances using the followin
 
     Developer note (05/02/25): The transfer project mechanism is currently in alpha deployment, and is known to exhibit inconsistent behaviour when trying to transfer a project to a new uninitialised group. This is an ongoing issue.
 
+6. Completion of a group of projects
+------------------------------------
+
+As of padocc v1.3.2, the Group operator now includes a ``complete_group`` method, which can be used to extract all created products from all projects in a group. This replaces the previous method which would involve running the validation phase in a specific way. This method requires a **completion directory** where all products will be copied. Project codes and revisions are applied at this stage to the copied products, whereas inside the pipeline most products are not referred to by their project codes.
+
+.. code:: python
+    
+    >>> # With my_group initialised with 'verbose as true'
+    >>> my_group.complete_group('my_home_dir/completed_datasets')
+    INFO [group-operation]: Verifying completion directory exists
+    INFO [group-operation]: Completing 2/2 projects for my-group
+    INFO [group-operation]: Updated new status: complete - Success
+    INFO [group-operation]: Updated new status: complete - Success
+
+You can then check inside the ``completed_datasets`` directory to verify all products are present. For each kerchunk/zarr dataset you will also see a ``.nca`` CFA dataset file, which follows the Climate Forecast Aggregation conventions (see https://cedadev.github.io/CFAPyX/ for more details). These can be used locally with Xarray to open the dataset.
+
 Using the ProjectOperation class
 ================================
 

diff --git a/padocc/__init__.py b/padocc/__init__.py
@@ -4,5 +4,4 @@
 
 from .core import ProjectOperation
 from .groups import GroupOperation
-
 from .phases import phase_map
diff --git a/padocc/cli.py b/padocc/cli.py
@@ -6,8 +6,9 @@
 
 import argparse
 
-from padocc.core.utils import BypassSwitch, get_attribute
 from padocc import GroupOperation, phase_map
+from padocc.core.utils import BypassSwitch, get_attribute
+
 
 def get_args():
     parser = argparse.ArgumentParser(description='Run a pipeline step for a group of datasets')

diff --git a/padocc/core/__init__.py b/padocc/core/__init__.py
@@ -2,15 +2,6 @@
 __contact__   = "[email protected]"
 __copyright__ = "Copyright 2024 United Kingdom Research and Innovation"
 
-from .logs import (
-    init_logger, 
-    reset_file_handler,
-    FalseLogger,
-    LoggedOperation
-)
-
-from .utils import (
-    BypassSwitch
-)
-
-from .project import ProjectOperation
+from .logs import FalseLogger, LoggedOperation, init_logger, reset_file_handler
+from .project import ProjectOperation
+from .utils import BypassSwitch
diff --git a/padocc/core/errors.py b/padocc/core/errors.py
@@ -3,20 +3,20 @@
 __copyright__ = "Copyright 2024 United Kingdom Research and Innovation"
 
 import json
-import os
 import logging
+import os
 import traceback
-
 from typing import Optional, Union
 
+
 def error_handler(
         err : Exception, 
         logger: logging.Logger, 
         phase: str,
         subset_bypass: bool = False,
         jobid: Optional[str] = None,
         status_fh: Optional[object] = None
-    ):
+    ) -> str:
 
     """
     This function should be used at top-level loops over project codes ONLY - 
@@ -25,6 +25,17 @@ def error_handler(
     1. Single slurm job failed - raise Error
     2. Single serial job failed - raise Error
     3. One of a set of tasks failed - print error for that dataset as traceback.
+
+    :param err:     (Exception) Error raised within some part of the pipeline.
+
+    :param logger:  (logging.Logger) Logging operator for any messages.
+
+    :param subset_bypass:   (bool) Skip raising an error if this operation
+        is part of a sequence.
+
+    :param jobid:   (str) The ID of the SLURM job if present.
+
+    :param status_fh:   (object) Padocc Filehandler to update status.
     """
 
     def get_status(tb: list) -> str:
@@ -156,7 +167,7 @@ def __init__(
             proj_code: Union[str,None] = None, 
             groupdir: Union[str,None] = None
         ) -> None:
-        self.message = f'Decoding resulted in overflow - received chunk data contains junk (attempted 3 times)'
+        self.message = 'Decoding resulted in overflow - received chunk data contains junk (attempted 3 times)'
         super().__init__(proj_code, groupdir)
         if verbose < 1:
             self.__class__.__module__ = 'builtins'
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,5 +4,4 @@

		from .core import ProjectOperation
		from .groups import GroupOperation

		from .phases import phase_map