From 0d033afcee8b02a6cb21932f98818a7696ccad2b Mon Sep 17 00:00:00 2001
From: EdHone <edward.hone@metoffice.gov.uk>
Date: Mon, 9 Mar 2026 15:06:26 +0000
Subject: [PATCH 1/9] Convert to fixed allocation of numpy array rather than
 list

---
 post-processing/tests/test_vernier_caliper.py |  47 ++---
 post-processing/tests/test_vernier_data.py    | 193 ++++++------------
 post-processing/tests/test_vernier_reader.py  |  41 ++--
 post-processing/vernier/vernier_data.py       |  53 +++--
 post-processing/vernier/vernier_reader.py     |  29 ++-
 5 files changed, 157 insertions(+), 206 deletions(-)

diff --git a/post-processing/tests/test_vernier_caliper.py b/post-processing/tests/test_vernier_caliper.py
index a69a9225..83ab3f1f 100644
--- a/post-processing/tests/test_vernier_caliper.py
+++ b/post-processing/tests/test_vernier_caliper.py
@@ -4,6 +4,7 @@
 #  under which the code may be used.
 # ------------------------------------------------------------------------------
 import unittest
+import numpy as np
 from pathlib import Path
 import sys
 sys.path.append(str(Path(__file__).parent.parent))
@@ -12,23 +13,23 @@
 class TestVernierCalliper(unittest.TestCase):
 
     def setUp(self):
-        self.calliper_a = VernierCalliper("test_calliper_a")
-        self.calliper_b = VernierCalliper("test_calliper_b")
+        self.calliper_a = VernierCalliper("test_calliper_a", 2)
+        self.calliper_b = VernierCalliper("test_calliper_b", 2)
 
     def test_init(self):
         self.assertEqual(self.calliper_a.name, "test_calliper_a")
-        self.assertEqual(self.calliper_a.time_percent, [])
-        self.assertEqual(self.calliper_a.cumul_time, [])
-        self.assertEqual(self.calliper_a.self_time, [])
-        self.assertEqual(self.calliper_a.total_time, [])
-        self.assertEqual(self.calliper_a.n_calls, [])
+        self.assertTrue(np.array_equal(self.calliper_a.time_percent, np.array([0.0, 0.0])))
+        self.assertTrue(np.array_equal(self.calliper_a.cumul_time, np.array([0.0, 0.0])))
+        self.assertTrue(np.array_equal(self.calliper_a.self_time, np.array([0.0, 0.0])))
+        self.assertTrue(np.array_equal(self.calliper_a.total_time, np.array([0.0, 0.0])))
+        self.assertTrue(np.array_equal(self.calliper_a.n_calls, np.array([0.0, 0.0])))
 
     def test_reduce(self):
-        self.calliper_a.time_percent = [10.0, 20.0]
-        self.calliper_a.cumul_time = [30.0, 40.0]
-        self.calliper_a.self_time = [5.0, 15.0]
-        self.calliper_a.total_time = [25.0, 35.0]
-        self.calliper_a.n_calls = [2, 2]
+        self.calliper_a.time_percent = np.array([10.0, 20.0])
+        self.calliper_a.cumul_time = np.array([30.0, 40.0])
+        self.calliper_a.self_time = np.array([5.0, 15.0])
+        self.calliper_a.total_time = np.array([25.0, 35.0])
+        self.calliper_a.n_calls = np.array([2, 2])
 
         reduced_data = self.calliper_a.reduce()
         self.assertEqual(reduced_data[0], "test_calliper_a")
@@ -40,17 +41,17 @@ def test_reduce(self):
         self.assertEqual(reduced_data[6], 15.0)
 
     def test_compare(self):
-        self.calliper_a.time_percent = [10.0, 20.0]
-        self.calliper_a.cumul_time = [30.0, 40.0]
-        self.calliper_a.self_time = [5.0, 15.0]
-        self.calliper_a.total_time = [25.0, 35.0]
-        self.calliper_a.n_calls = [2, 2]
-
-        self.calliper_b.time_percent = [12.0, 25.0]
-        self.calliper_b.cumul_time = [35.0, 46.0]
-        self.calliper_b.self_time = [6.0, 19.0]
-        self.calliper_b.total_time = [28.0, 39.0]
-        self.calliper_b.n_calls = [2, 2]
+        self.calliper_a.time_percent = np.array([10.0, 20.0])
+        self.calliper_a.cumul_time = np.array([30.0, 40.0])
+        self.calliper_a.self_time = np.array([5.0, 15.0])
+        self.calliper_a.total_time = np.array([25.0, 35.0])
+        self.calliper_a.n_calls = np.array([2, 2])
+
+        self.calliper_b.time_percent = np.array([12.0, 25.0])
+        self.calliper_b.cumul_time = np.array([35.0, 46.0])
+        self.calliper_b.self_time = np.array([6.0, 19.0])
+        self.calliper_b.total_time = np.array([28.0, 39.0])
+        self.calliper_b.n_calls = np.array([2, 2])
 
         self.assertTrue(self.calliper_a < self.calliper_b)
         self.assertFalse(self.calliper_a > self.calliper_b)
diff --git a/post-processing/tests/test_vernier_data.py b/post-processing/tests/test_vernier_data.py
index c4db62bf..64db37fd 100644
--- a/post-processing/tests/test_vernier_data.py
+++ b/post-processing/tests/test_vernier_data.py
@@ -5,6 +5,7 @@
 # ------------------------------------------------------------------------------
 from pathlib import Path
 from io import StringIO
+import numpy as np
 import tempfile
 import unittest
 import sys
@@ -19,45 +20,45 @@ def setUp(self):
         self.test_data = VernierData()
 
     def test_add_empty_calliper(self):
-        self.test_data.add_calliper("test_calliper")
+        self.test_data.add_calliper("test_calliper", 1)
         self.assertIn("test_calliper", self.test_data.data)
-        self.assertEqual(self.test_data.data["test_calliper"].time_percent, [])
-        self.assertEqual(self.test_data.data["test_calliper"].cumul_time, [])
-        self.assertEqual(self.test_data.data["test_calliper"].self_time, [])
-        self.assertEqual(self.test_data.data["test_calliper"].total_time, [])
-        self.assertEqual(self.test_data.data["test_calliper"].n_calls, [])
+        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].time_percent, np.array([0.0])))
+        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].cumul_time, np.array([0.0])))
+        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].self_time, np.array([0.0])))
+        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].total_time, np.array([0.0])))
+        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].n_calls, np.array([0.0])))
 
     def test_filter_calliper(self):
-        self.test_data.add_calliper("timestep_calliper")
-        self.test_data.add_calliper("other_calliper")
+        self.test_data.add_calliper("timestep_calliper", 1)
+        self.test_data.add_calliper("other_calliper", 1)
         filtered = self.test_data.filter(["timestep"])
         self.assertIn("timestep_calliper", filtered.data)
         self.assertNotIn("other_calliper", filtered.data)
 
     def test_filter_no_match(self):
-        self.test_data.add_calliper("timestep_calliper")
+        self.test_data.add_calliper("timestep_calliper", 1)
         with self.assertRaises(ValueError):
             self.test_data.filter(["nonexistent"])
 
     def test_filter_multiple_matches(self):
-        self.test_data.add_calliper("timestep_calliper_1")
-        self.test_data.add_calliper("timestep_calliper_2")
+        self.test_data.add_calliper("timestep_calliper_1", 1)
+        self.test_data.add_calliper("timestep_calliper_2", 1)
         filtered = self.test_data.filter(["timestep"])
         self.assertIn("timestep_calliper_1", filtered.data)
         self.assertIn("timestep_calliper_2", filtered.data)
 
     def test_filter_empty_keys(self):
-        self.test_data.add_calliper("timestep_calliper")
+        self.test_data.add_calliper("timestep_calliper", 1)
         with self.assertRaises(ValueError):
             self.test_data.filter([])
 
     def test_write_txt_output_file(self):
-        self.test_data.add_calliper("test_calliper")
-        self.test_data.data["test_calliper"].time_percent = [10.0, 20.0]
-        self.test_data.data["test_calliper"].cumul_time = [30.0, 40.0]
-        self.test_data.data["test_calliper"].self_time = [5.0, 15.0]
-        self.test_data.data["test_calliper"].total_time = [25.0, 35.0]
-        self.test_data.data["test_calliper"].n_calls = [2]
+        self.test_data.add_calliper("test_calliper", 2)
+        self.test_data.data["test_calliper"].time_percent = np.array([10.0, 20.0])
+        self.test_data.data["test_calliper"].cumul_time = np.array([30.0, 40.0])
+        self.test_data.data["test_calliper"].self_time = np.array([5.0, 15.0])
+        self.test_data.data["test_calliper"].total_time = np.array([25.0, 35.0])
+        self.test_data.data["test_calliper"].n_calls = np.array([2, 2])
 
         with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
             self.test_data.write_txt_output(Path(tmp_file.name))
@@ -66,12 +67,12 @@ def test_write_txt_output_file(self):
             self.assertEqual("| test_calliper |           30.0 |         10.0 |           35.0 |         2 |     15.0 |              15.0 |", contents[1])
 
     def test_write_txt_output_terminal(self):
-        self.test_data.add_calliper("test_calliper")
-        self.test_data.data["test_calliper"].time_percent = [50.0, 40.0]
-        self.test_data.data["test_calliper"].cumul_time = [10.0, 12.0]
-        self.test_data.data["test_calliper"].self_time = [3.0, 4.0]
-        self.test_data.data["test_calliper"].total_time = [15.0, 55.0]
-        self.test_data.data["test_calliper"].n_calls = [2]
+        self.test_data.add_calliper("test_calliper", 2)
+        self.test_data.data["test_calliper"].time_percent = np.array([50.0, 40.0])
+        self.test_data.data["test_calliper"].cumul_time = np.array([10.0, 12.0])
+        self.test_data.data["test_calliper"].self_time = np.array([3.0, 4.0])
+        self.test_data.data["test_calliper"].total_time = np.array([15.0, 55.0])
+        self.test_data.data["test_calliper"].n_calls = np.array([2, 2])
 
         write_output = StringIO()
         sys.stdout = write_output
@@ -83,46 +84,45 @@ def test_write_txt_output_terminal(self):
 
     def test_aggregate(self):
         data1 = VernierData()
-        data1.add_calliper("calliper_a")
-        data1.data["calliper_a"].time_percent = [10.0, 20.0]
-        data1.data["calliper_a"].cumul_time = [30.0, 40.0]
-        data1.data["calliper_a"].self_time = [5.0, 15.0]
-        data1.data["calliper_a"].total_time = [25.0, 35.0]
-        data1.data["calliper_a"].n_calls = [2, 2]
+        data1.add_calliper("calliper_a", 2)
+        data1.data["calliper_a"].time_percent = np.array([10.0, 20.0])
+        data1.data["calliper_a"].cumul_time = np.array([30.0, 40.0])
+        data1.data["calliper_a"].self_time = np.array([5.0, 15.0])
+        data1.data["calliper_a"].total_time = np.array([25.0, 35.0])
+        data1.data["calliper_a"].n_calls = np.array([2, 2])
 
         data2 = VernierData()
-        data2.add_calliper("calliper_a")
-        data2.data["calliper_a"].time_percent = [15.0, 25.0]
-        data2.data["calliper_a"].cumul_time = [35.0, 45.0]
-        data2.data["calliper_a"].self_time = [6.0, 16.0]
-        data2.data["calliper_a"].total_time = [28.0, 38.0]
-        data2.data["calliper_a"].n_calls = [3, 3]
-
-        aggregated = VernierData()
-        aggregated.aggregate([data1, data2])
+        data2.add_calliper("calliper_a", 2)
+        data2.data["calliper_a"].time_percent = np.array([15.0, 25.0])
+        data2.data["calliper_a"].cumul_time = np.array([35.0, 45.0])
+        data2.data["calliper_a"].self_time = np.array([6.0, 16.0])
+        data2.data["calliper_a"].total_time = np.array([28.0, 38.0])
+        data2.data["calliper_a"].n_calls = np.array([3, 3])
+
+        aggregated = aggregate([data1, data2])
         self.assertIn("calliper_a", aggregated.data)
-        self.assertEqual(aggregated.data["calliper_a"].time_percent, [10.0, 20.0, 15.0, 25.0])
-        self.assertEqual(aggregated.data["calliper_a"].cumul_time, [30.0, 40.0, 35.0, 45.0])
-        self.assertEqual(aggregated.data["calliper_a"].self_time, [5.0, 15.0, 6.0, 16.0])
-        self.assertEqual(aggregated.data["calliper_a"].total_time, [25.0, 35.0, 28.0, 38.0])
-        self.assertEqual(aggregated.data["calliper_a"].n_calls, [2, 2, 3, 3])
+        self.assertTrue(np.array_equal(aggregated.data["calliper_a"].time_percent, np.array([10.0, 20.0, 15.0, 25.0])))
+        self.assertTrue(np.array_equal(aggregated.data["calliper_a"].cumul_time, np.array([30.0, 40.0, 35.0, 45.0])))
+        self.assertTrue(np.array_equal(aggregated.data["calliper_a"].self_time, np.array([5.0, 15.0, 6.0, 16.0])))
+        self.assertTrue(np.array_equal(aggregated.data["calliper_a"].total_time, np.array([25.0, 35.0, 28.0, 38.0])))
+        self.assertTrue(np.array_equal(aggregated.data["calliper_a"].n_calls, np.array([2, 2, 3, 3])))
 
     def test_aggregate_inconsistent(self):
         data1 = VernierData()
-        data1.add_calliper("calliper_a")
-        data1.data["calliper_a"].time_percent = [10.0, 20.0]
-        data1.data["calliper_a"].cumul_time = [30.0, 40.0]
-        data1.data["calliper_a"].self_time = [5.0, 15.0]
-        data1.data["calliper_a"].total_time = [25.0, 35.0]
-        data1.data["calliper_a"].n_calls = [2, 2]
+        data1.add_calliper("calliper_a", 2)
+        data1.data["calliper_a"].time_percent = np.array([10.0, 20.0])
+        data1.data["calliper_a"].cumul_time = np.array([30.0, 40.0])
+        data1.data["calliper_a"].self_time = np.array([5.0, 15.0])
+        data1.data["calliper_a"].total_time = np.array([25.0, 35.0])
+        data1.data["calliper_a"].n_calls = np.array([2, 2])
 
         data2 = VernierData()
-        data2.add_calliper("calliper_b")
-        data2.data["calliper_b"].time_percent = [15.0, 25.0]
-        data2.data["calliper_b"].cumul_time = [35.0, 45.0]
-        data2.data["calliper_b"].self_time = [6.0, 16.0]
-        data2.data["calliper_b"].total_time = [28.0, 38.0]
-        data2.data["calliper_b"].n_calls = [3, 3]
+        data2.add_calliper("calliper_b", 2)
+        data2.data["calliper_b"].time_percent = np.array([15.0, 25.0])
+        data2.data["calliper_b"].cumul_time = np.array([35.0, 45.0])
+        data2.data["calliper_b"].self_time = np.array([6.0, 16.0])
+        data2.data["calliper_b"].total_time = np.array([28.0, 38.0])
+        data2.data["calliper_b"].n_calls = np.array([3, 3])
 
         with self.assertRaises(ValueError):
             aggregated = VernierData()
@@ -130,87 +130,10 @@ def test_aggregate_inconsistent(self):
 
     def test_aggregate_inconsistent_ok(self):
         data1 = VernierData()
-        data1.add_calliper("calliper_a")
+        data1.add_calliper("calliper_a", 2)
 
         data2 = VernierData()
-        data2.add_calliper("calliper_b")
-
-        aggregated = VernierData()
-        aggregated.aggregate([data1, data2], internal_consistency=False)
-        self.assertIn("calliper_a", aggregated.data)
-        self.assertIn("calliper_b", aggregated.data)
-
-    def test_get(self):
-        data1 = VernierData()
-        data1.add_calliper("calliper_a")
-        data1.data["calliper_a"].time_percent = [10.0, 20.0]
-        data1.data["calliper_a"].cumul_time = [30.0, 40.0]
-        data1.data["calliper_a"].self_time = [5.0, 15.0]
-        data1.data["calliper_a"].total_time = [25.0, 35.0]
-        data1.data["calliper_a"].n_calls = [2, 2]
-        self.assertEqual(len(data1.get("calliper_a")), 2)
-
-
-class TestVernierCollation(unittest.TestCase):
-    """
-    Tests for the VernierData Collation class.
-    """
-    def _add_data(self):
-        self.collation = VernierDataCollation()
-        data1 = VernierData()
-        data1.add_calliper("calliper_a")
-        data1.data["calliper_a"].time_percent = [10.0, 20.0]
-        data1.data["calliper_a"].cumul_time = [30.0, 40.0]
-        data1.data["calliper_a"].self_time = [5.0, 15.0]
-        data1.data["calliper_a"].total_time = [25.0, 35.0]
-        data1.data["calliper_a"].n_calls = [2, 2]
-
-        data2 = VernierData()
-        data2.add_calliper("calliper_a")
-        data2.data["calliper_a"].time_percent = [15.0, 25.0]
-        data2.data["calliper_a"].cumul_time = [35.0, 45.0]
-        data2.data["calliper_a"].self_time = [6.0, 16.0]
-        data2.data["calliper_a"].total_time = [28.0, 38.0]
-        data2.data["calliper_a"].n_calls = [3, 3]
-
-        self.collation.add_data('test1', data1)
-        self.collation.add_data('test2', data2)
-        
-    def test_add_data(self):
-        self._add_data()
-        self.assertEqual(len(self.collation), 2)
-
-    def test_remove_data(self):
-        self._add_data()
-        self.collation.remove_data('test1')
-        self.assertEqual(len(self.collation), 1)
-
-    def test_get(self):
-        self._add_data()
-        calliper_a = self.collation.get("calliper_a")
-        self.assertEqual(len(calliper_a), 4)
-
-    def test_internal_consistency(self):
-        self._add_data()
-        data_inc = VernierData()
-        data_inc.add_calliper("calliper_a")
-        data_inc.data["calliper_a"].time_percent = [10.0, 20.0]
-        data_inc.data["calliper_a"].cumul_time = [30.0, 40.0]
-        data_inc.data["calliper_a"].self_time = [5.0, 15.0]
-        data_inc.data["calliper_a"].total_time = [25.0, 35.0]
-        data_inc.data["calliper_a"].n_calls = [2, 2]
-
-        data_inc.add_calliper("calliper_b")
-        data_inc.data["calliper_b"].time_percent = [15.0, 25.0]
-        data_inc.data["calliper_b"].cumul_time = [35.0, 45.0]
-        data_inc.data["calliper_b"].self_time = [6.0, 16.0]
-        data_inc.data["calliper_b"].total_time = [28.0, 38.0]
-        data_inc.data["calliper_b"].n_calls = [3, 3]
-
-        with self.assertRaises(ValueError) as test_exception:
-            self.collation.add_data('test3', data_inc)
-        self.assertEqual(str(test_exception.exception),
-                         "inconsistent callipers in new_vernier_data")
+        data2.add_calliper("calliper_b", 2)
 
 
 if __name__ == '__main__':
diff --git a/post-processing/tests/test_vernier_reader.py b/post-processing/tests/test_vernier_reader.py
index 9723e5cc..e1d465ca 100644
--- a/post-processing/tests/test_vernier_reader.py
+++ b/post-processing/tests/test_vernier_reader.py
@@ -5,6 +5,7 @@
 # ------------------------------------------------------------------------------
 from pathlib import Path
 import unittest
+import numpy as np
 import sys
 sys.path.append(str(Path(__file__).parent.parent))
 from vernier.vernier_reader import VernierReader
@@ -29,32 +30,32 @@ def test_load_from_file(self):
         loaded_data = test_reader.load()
         self.assertIn("__test_app__", loaded_data.data)
         self.assertIn("some_process", loaded_data.data)
-        self.assertEqual(loaded_data.data["__test_app__"].n_calls, [1, 1])
-        self.assertEqual(loaded_data.data["some_process"].n_calls, [2, 2])
-        self.assertEqual(loaded_data.data["__test_app__"].time_percent, [44.130, 37.835])
-        self.assertEqual(loaded_data.data["some_process"].time_percent, [34.563, 43.991])
-        self.assertEqual(loaded_data.data["__test_app__"].cumul_time, [2.583, 5.596])
-        self.assertEqual(loaded_data.data["some_process"].cumul_time, [4.606, 3.009])
-        self.assertEqual(loaded_data.data["__test_app__"].self_time, [2.583, 2.588])
-        self.assertEqual(loaded_data.data["some_process"].self_time, [2.023, 3.009])
-        self.assertEqual(loaded_data.data["__test_app__"].total_time, [5.854, 6.839])
-        self.assertEqual(loaded_data.data["some_process"].total_time,[2.077, 3.069])
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].n_calls, np.array([1, 1])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].n_calls, np.array([2, 2])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].time_percent, np.array([44.130, 37.835])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].time_percent, np.array([34.563, 43.991])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].cumul_time, np.array([2.583, 5.596])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].cumul_time, np.array([4.606, 3.009])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].self_time, np.array([2.583, 2.588])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].self_time, np.array([2.023, 3.009])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].total_time, np.array([5.854, 6.839])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].total_time, np.array([2.077, 3.069])))
 
     def test_load_from_directory(self):
         test_reader = VernierReader(self.test_data_dir / "vernier-output")
         loaded_data = test_reader.load()
         self.assertIn("__test_app__", loaded_data.data)
         self.assertIn("some_process", loaded_data.data)
-        self.assertCountEqual(loaded_data.data["__test_app__"].n_calls, [1, 1])
-        self.assertCountEqual(loaded_data.data["some_process"].n_calls, [2, 2])
-        self.assertCountEqual(loaded_data.data["__test_app__"].time_percent, [44.130, 37.835])
-        self.assertCountEqual(loaded_data.data["some_process"].time_percent, [34.563, 43.991])
-        self.assertCountEqual(loaded_data.data["__test_app__"].cumul_time, [2.583, 5.596])
-        self.assertCountEqual(loaded_data.data["some_process"].cumul_time, [4.606, 3.009])
-        self.assertCountEqual(loaded_data.data["__test_app__"].self_time, [2.583, 2.588])
-        self.assertCountEqual(loaded_data.data["some_process"].self_time, [2.023, 3.009])
-        self.assertCountEqual(loaded_data.data["__test_app__"].total_time, [5.854, 6.839])
-        self.assertCountEqual(loaded_data.data["some_process"].total_time,[2.077, 3.069])
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].n_calls, np.array([1, 1])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].n_calls, np.array([2, 2])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].time_percent, np.array([44.130, 37.835])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].time_percent, np.array([34.563, 43.991])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].cumul_time, np.array([2.583, 5.596])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].cumul_time, np.array([4.606, 3.009])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].self_time, np.array([2.583, 2.588])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].self_time, np.array([2.023, 3.009])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].total_time, np.array([5.854, 6.839])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].total_time, np.array([2.077, 3.069])))
 
 
 if __name__ == '__main__':
diff --git a/post-processing/vernier/vernier_data.py b/post-processing/vernier/vernier_data.py
index 116c10a8..32f443ca 100644
--- a/post-processing/vernier/vernier_data.py
+++ b/post-processing/vernier/vernier_data.py
@@ -9,31 +9,30 @@
 from pathlib import Path
 from typing import Optional
 
-@dataclass(order=True)
 class VernierCalliper():
     """Class to hold data for a single Vernier calliper, including arrays for each metric."""
 
-    total_time: list[float]
-    time_percent: list[float]
-    self_time: list[float]
-    cumul_time: list[float]
-    n_calls: list[int]
+    self_time: np.ndarray
+    total_time: np.ndarray
+    time_percent: np.ndarray
+    cumul_time: np.ndarray
+    n_calls: np.ndarray
     name: str
 
-    def __init__(self, name: str):
+    def __init__(self, name: str, n_ranks: int):
 
         self.name = name
-        self.time_percent = []
-        self.cumul_time = []
-        self.self_time = []
-        self.total_time = []
-        self.n_calls = []
+        self.time_percent = np.zeros(n_ranks)
+        self.cumul_time = np.zeros(n_ranks)
+        self.self_time = np.zeros(n_ranks)
+        self.total_time = np.zeros(n_ranks)
+        self.n_calls = np.zeros(n_ranks)
 
         return
 
     def __len__(self):
         """
-        Return None if caliper elements differ in length,
+        Return None if calliper elements differ in length,
         otherwise return element lengths.
         """
         result = None
@@ -47,12 +46,12 @@ def reduce(self) -> list:
 
         return [
             self.name.replace('@0', ''), # calliper name
-            round(np.mean(self.total_time), 5), # mean total time across calls
-            round(np.mean(self.self_time), 5), # mean self time across calls
-            round(np.mean(self.cumul_time), 5), # mean cumulative time across calls
-            self.n_calls[0], # number of calls (should be the same for all entries, so just take the first)
-            round(np.mean(self.time_percent), 5), # mean percentage of time across calls
-            round(np.mean(self.total_time) / self.n_calls[0], 5) # mean time per call
+            round(self.total_time.mean(), 5), # mean total time across calls
+            round(self.self_time.mean(), 5), # mean self time across calls
+            round(self.cumul_time.mean(), 5), # mean cumulative time across calls
+            int(self.n_calls[0])    , # number of calls (should be the same for all entries, so just take the first)
+            round(self.time_percent.mean(), 5), # mean percentage of time across calls
+            round(self.total_time.mean() / self.n_calls[0], 5) # mean time per call
         ]
 
     @classmethod
@@ -60,6 +59,18 @@ def labels(self):
         return ["Routine", "Total time (s)", "Self (s)", "Cumul time (s)",
                 "No. calls", "% time", "Time per call (s)"]
 
+    def __lt__(self, other):
+        """Comparison method for sorting callipers by self time."""
+        return self.self_time.sum() < other.self_time.sum()
+
+    def __eq__(self, other):
+        """Equality method for comparing callipers by self time."""
+        return self.self_time.sum() == other.self_time.sum()
+
+    def __gt__(self, other):
+        """Comparison method for sorting callipers by self time."""
+        return self.self_time.sum() > other.self_time.sum()
+
 
 class VernierData():
     """
@@ -75,11 +86,11 @@ def __init__(self):
         return
 
 
-    def add_calliper(self, calliper_key: str):
+    def add_calliper(self, calliper_key: str, n_ranks: int):
         """Adds a new calliper to the data structure, with empty arrays for each metric."""
 
         # Create empty data arrays
-        self.data[calliper_key] = VernierCalliper(calliper_key)
+        self.data[calliper_key] = VernierCalliper(calliper_key, n_ranks)
 
     def filter(self, calliper_keys: list[str]):
         """Filters the Vernier data to include only callipers matching the provided keys.
diff --git a/post-processing/vernier/vernier_reader.py b/post-processing/vernier/vernier_reader.py
index ac015e7f..8f2c38b6 100644
--- a/post-processing/vernier/vernier_reader.py
+++ b/post-processing/vernier/vernier_reader.py
@@ -24,23 +24,38 @@ def _load_from_file(self) -> VernierData:
         """
 
         loaded = VernierData()
+        n_ranks_total = None
+        rank = 0
 
         # Populate data
-        contents = self.path.read_text().splitlines()
+        contents = self.path.read_text()
+        n_ranks_in_file = contents.count("Task") # Number of ranks worth of data in file only countable this way
+        contents = contents.splitlines()
         for line in contents:
             sline = line.split()
             if len(sline) > 0: # Line contains data
+                if "Task" in sline:
+                    if n_ranks_total is None:
+                        n_ranks_total = int(sline[3])
+                        if not n_ranks_in_file in [1, n_ranks_total]:
+                            raise ValueError(f"Input data not consistent with either collated or multi-file vernier output.")
+                    rank = int(sline[1]) # Extract rank number from the data line
                 if sline[0].isdigit(): # Calliper lines start with a digit
 
+                    if n_ranks_in_file == 1:
+                        data_index = 0
+                    else:
+                        data_index = rank - 1 # Data index for this rank (0-based)
+
                     calliper = sline[-1]
                     if not calliper in loaded.data:
-                        loaded.add_calliper(calliper)
+                        loaded.add_calliper(calliper, n_ranks_in_file)
 
-                    loaded.data[calliper].time_percent.append(float(sline[1]))
-                    loaded.data[calliper].cumul_time.append(float(sline[2]))
-                    loaded.data[calliper].self_time.append(float(sline[3]))
-                    loaded.data[calliper].total_time.append(float(sline[4]))
-                    loaded.data[calliper].n_calls.append(int(sline[5]))
+                    loaded.data[calliper].time_percent[data_index] = float(sline[1])
+                    loaded.data[calliper].cumul_time[data_index] = float(sline[2])
+                    loaded.data[calliper].self_time[data_index] = float(sline[3])
+                    loaded.data[calliper].total_time[data_index] = float(sline[4])
+                    loaded.data[calliper].n_calls[data_index] = int(sline[5])
 
         if not loaded.data:
             raise ValueError(f"No calliper data found in file '{self.path}'.")

From 9167f57ad32869080829171e856c86fd830de12b Mon Sep 17 00:00:00 2001
From: EdHone <edward.hone@metoffice.gov.uk>
Date: Fri, 13 Mar 2026 15:13:14 +0000
Subject: [PATCH 2/9] Switch to numpy array implementation

---
 post-processing/tests/test_vernier_data.py | 53 ----------------------
 post-processing/vernier/vernier_data.py    | 32 -------------
 post-processing/vernier/vernier_reader.py  | 32 +++++++++++--
 3 files changed, 27 insertions(+), 90 deletions(-)

diff --git a/post-processing/tests/test_vernier_data.py b/post-processing/tests/test_vernier_data.py
index 64db37fd..2ec435df 100644
--- a/post-processing/tests/test_vernier_data.py
+++ b/post-processing/tests/test_vernier_data.py
@@ -82,59 +82,6 @@ def test_write_txt_output_terminal(self):
         self.assertEqual("|       Routine | Total time (s) |     Self (s) | Cumul time (s) | No. calls |   % time | Time per call (s) |", write_output.getvalue().splitlines()[0])
         self.assertEqual("| test_calliper |           35.0 |          3.5 |           11.0 |         2 |     45.0 |              17.5 |", write_output.getvalue().splitlines()[1])
 
-    def test_aggregate(self):
-        data1 = VernierData()
-        data1.add_calliper("calliper_a", 2)
-        data1.data["calliper_a"].time_percent = np.array([10.0, 20.0])
-        data1.data["calliper_a"].cumul_time = np.array([30.0, 40.0])
-        data1.data["calliper_a"].self_time = np.array([5.0, 15.0])
-        data1.data["calliper_a"].total_time = np.array([25.0, 35.0])
-        data1.data["calliper_a"].n_calls = np.array([2, 2])
-
-        data2 = VernierData()
-        data2.add_calliper("calliper_a", 2)
-        data2.data["calliper_a"].time_percent = np.array([15.0, 25.0])
-        data2.data["calliper_a"].cumul_time = np.array([35.0, 45.0])
-        data2.data["calliper_a"].self_time = np.array([6.0, 16.0])
-        data2.data["calliper_a"].total_time = np.array([28.0, 38.0])
-        data2.data["calliper_a"].n_calls = np.array([3, 3])
-
-        aggregated = aggregate([data1, data2])
-        self.assertIn("calliper_a", aggregated.data)
-        self.assertTrue(np.array_equal(aggregated.data["calliper_a"].time_percent, np.array([10.0, 20.0, 15.0, 25.0])))
-        self.assertTrue(np.array_equal(aggregated.data["calliper_a"].cumul_time, np.array([30.0, 40.0, 35.0, 45.0])))
-        self.assertTrue(np.array_equal(aggregated.data["calliper_a"].self_time, np.array([5.0, 15.0, 6.0, 16.0])))
-        self.assertTrue(np.array_equal(aggregated.data["calliper_a"].total_time, np.array([25.0, 35.0, 28.0, 38.0])))
-        self.assertTrue(np.array_equal(aggregated.data["calliper_a"].n_calls, np.array([2, 2, 3, 3])))
-
-    def test_aggregate_inconsistent(self):
-        data1 = VernierData()
-        data1.add_calliper("calliper_a", 2)
-        data1.data["calliper_a"].time_percent = np.array([10.0, 20.0])
-        data1.data["calliper_a"].cumul_time = np.array([30.0, 40.0])
-        data1.data["calliper_a"].self_time = np.array([5.0, 15.0])
-        data1.data["calliper_a"].total_time = np.array([25.0, 35.0])
-        data1.data["calliper_a"].n_calls = np.array([2, 2])
-
-        data2 = VernierData()
-        data2.add_calliper("calliper_b", 2)
-        data2.data["calliper_b"].time_percent = np.array([15.0, 25.0])
-        data2.data["calliper_b"].cumul_time = np.array([35.0, 45.0])
-        data2.data["calliper_b"].self_time = np.array([6.0, 16.0])
-        data2.data["calliper_b"].total_time = np.array([28.0, 38.0])
-        data2.data["calliper_b"].n_calls = np.array([3, 3])
-
-        with self.assertRaises(ValueError):
-            aggregated = VernierData()
-            aggregated.aggregate([data1, data2])
-
-    def test_aggregate_inconsistent_ok(self):
-        data1 = VernierData()
-        data1.add_calliper("calliper_a", 2)
-
-        data2 = VernierData()
-        data2.add_calliper("calliper_b", 2)
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/post-processing/vernier/vernier_data.py b/post-processing/vernier/vernier_data.py
index 32f443ca..466e456d 100644
--- a/post-processing/vernier/vernier_data.py
+++ b/post-processing/vernier/vernier_data.py
@@ -144,38 +144,6 @@ def get(self, calliper_key):
         return self.data.get(calliper_key, None)
 
 
-    def aggregate(self, vernier_data_list=None, internal_consistency=True):
-        """
-        Aggregates a list of VernierData objects into a single VernierData
-        object, by concatenating the data for each calliper across the input
-        objects.
-        """
-
-        if vernier_data_list is None:
-            vernier_data_list = []
-        if internal_consistency:
-            # Check that all input VernierData objects have the same
-            # set of callipers
-            calliper_sets = [set(vernier_data.data.keys()) for vernier_data in
-                             vernier_data_list]
-            if not all(calliper_set == calliper_sets[0] for
-                       calliper_set in calliper_sets):
-                raise ValueError("Input VernierData objects do not have the "
-                                 "same set of callipers, but "
-                                 "internal_consistency is set to True.")
-
-        for vernier_data in vernier_data_list:
-            for calliper in vernier_data.data.keys():
-                if not calliper in self.data:
-                    self.add_calliper(calliper)
-
-                self.data[calliper].time_percent.extend(vernier_data.data[calliper].time_percent)
-                self.data[calliper].cumul_time.extend(vernier_data.data[calliper].cumul_time)
-                self.data[calliper].self_time.extend(vernier_data.data[calliper].self_time)
-                self.data[calliper].total_time.extend(vernier_data.data[calliper].total_time)
-                self.data[calliper].n_calls.extend(vernier_data.data[calliper].n_calls)
-
-
 class VernierDataCollation():
     """
     Class to hold an collation of VernierData instances.
diff --git a/post-processing/vernier/vernier_reader.py b/post-processing/vernier/vernier_reader.py
index 8f2c38b6..3d6b490d 100644
--- a/post-processing/vernier/vernier_reader.py
+++ b/post-processing/vernier/vernier_reader.py
@@ -28,10 +28,10 @@ def _load_from_file(self) -> VernierData:
         rank = 0
 
         # Populate data
-        contents = self.path.read_text()
-        n_ranks_in_file = contents.count("Task") # Number of ranks worth of data in file only countable this way
-        contents = contents.splitlines()
-        for line in contents:
+        file_contents = self.path.read_text()
+        n_ranks_in_file = file_contents.count("Task") # Number of ranks worth of data in file only countable this way
+        file_data = file_contents.splitlines()
+        for line in file_data:
             sline = line.split()
             if len(sline) > 0: # Line contains data
                 if "Task" in sline:
@@ -72,7 +72,29 @@ def _load_from_directory(self) -> VernierData:
             vernier_datasets = list(pool.map(lambda f: VernierReader(self.path / f)._load_from_file(), vernier_files))
 
         result = VernierData()
-        result.aggregate(vernier_datasets)
+
+        # Check that all input VernierData objects have the same
+        # set of callipers
+        calliper_sets = [set(vernier_data.data.keys()) for vernier_data in
+                            vernier_datasets]
+        if not all(calliper_set == calliper_sets[0] for
+                    calliper_set in calliper_sets):
+            raise ValueError("Input VernierData objects do not have the "
+                                "same set of callipers, but "
+                                "internal_consistency is set to True.")
+
+        for i, vernier_data in enumerate(vernier_datasets):
+            for calliper in vernier_data.data.keys():
+                if not calliper in result.data:
+                    result.add_calliper(calliper, len(vernier_datasets))
+
+                result.data[calliper].time_percent[i] = vernier_data.data[calliper].time_percent
+                result.data[calliper].cumul_time[i] = vernier_data.data[calliper].cumul_time
+                result.data[calliper].self_time[i] = vernier_data.data[calliper].self_time
+                result.data[calliper].total_time[i] = vernier_data.data[calliper].total_time
+                result.data[calliper].n_calls[i] = vernier_data.data[calliper].n_calls
+
+
         return result
 
 

From 6ce7bd6442823b49b5e63f445124f4e0244fecc0 Mon Sep 17 00:00:00 2001
From: EdHone <edward.hone@metoffice.gov.uk>
Date: Fri, 13 Mar 2026 16:38:02 +0000
Subject: [PATCH 3/9] Code implementation for handling multithreaded data

---
 post-processing/vernier/vernier_data.py   | 20 +++++------
 post-processing/vernier/vernier_reader.py | 42 +++++++++++++----------
 2 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/post-processing/vernier/vernier_data.py b/post-processing/vernier/vernier_data.py
index 466e456d..82e24a61 100644
--- a/post-processing/vernier/vernier_data.py
+++ b/post-processing/vernier/vernier_data.py
@@ -19,14 +19,14 @@ class VernierCalliper():
     n_calls: np.ndarray
     name: str
 
-    def __init__(self, name: str, n_ranks: int):
+    def __init__(self, name: str, n_ranks: int, n_threads: int):
 
         self.name = name
-        self.time_percent = np.zeros(n_ranks)
-        self.cumul_time = np.zeros(n_ranks)
-        self.self_time = np.zeros(n_ranks)
-        self.total_time = np.zeros(n_ranks)
-        self.n_calls = np.zeros(n_ranks)
+        self.time_percent = np.zeros((n_ranks, n_threads))
+        self.cumul_time = np.zeros((n_ranks, n_threads))
+        self.self_time = np.zeros((n_ranks, n_threads))
+        self.total_time = np.zeros((n_ranks, n_threads))
+        self.n_calls = np.zeros((n_ranks, n_threads), dtype=int)
 
         return
 
@@ -49,9 +49,9 @@ def reduce(self) -> list:
             round(self.total_time.mean(), 5), # mean total time across calls
             round(self.self_time.mean(), 5), # mean self time across calls
             round(self.cumul_time.mean(), 5), # mean cumulative time across calls
-            int(self.n_calls[0])    , # number of calls (should be the same for all entries, so just take the first)
+            int(self.n_calls.max())    , # number of calls
             round(self.time_percent.mean(), 5), # mean percentage of time across calls
-            round(self.total_time.mean() / self.n_calls[0], 5) # mean time per call
+            round(np.mean(self.total_time / self.n_calls), 5) # mean time per call
         ]
 
     @classmethod
@@ -86,11 +86,11 @@ def __init__(self):
         return
 
 
-    def add_calliper(self, calliper_key: str, n_ranks: int):
+    def add_calliper(self, calliper_key: str, n_ranks: int, n_threads: int):
         """Adds a new calliper to the data structure, with empty arrays for each metric."""
 
         # Create empty data arrays
-        self.data[calliper_key] = VernierCalliper(calliper_key, n_ranks)
+        self.data[calliper_key] = VernierCalliper(calliper_key, n_ranks, n_threads)
 
     def filter(self, calliper_keys: list[str]):
         """Filters the Vernier data to include only callipers matching the provided keys.
diff --git a/post-processing/vernier/vernier_reader.py b/post-processing/vernier/vernier_reader.py
index 3d6b490d..0c05f302 100644
--- a/post-processing/vernier/vernier_reader.py
+++ b/post-processing/vernier/vernier_reader.py
@@ -30,6 +30,8 @@ def _load_from_file(self) -> VernierData:
         # Populate data
         file_contents = self.path.read_text()
         n_ranks_in_file = file_contents.count("Task") # Number of ranks worth of data in file only countable this way
+        calliper_ids = [line.split()[-1] for line in file_contents.splitlines() if line.split() and line.split()[0].isdigit()]
+
         file_data = file_contents.splitlines()
         for line in file_data:
             sline = line.split()
@@ -39,23 +41,27 @@ def _load_from_file(self) -> VernierData:
                         n_ranks_total = int(sline[3])
                         if not n_ranks_in_file in [1, n_ranks_total]:
                             raise ValueError(f"Input data not consistent with either collated or multi-file vernier output.")
-                    rank = int(sline[1]) # Extract rank number from the data line
+                    rank = int(sline[-1]) # Extract rank number from the data line
                 if sline[0].isdigit(): # Calliper lines start with a digit
 
                     if n_ranks_in_file == 1:
-                        data_index = 0
+                        rank_index = 0
                     else:
-                        data_index = rank - 1 # Data index for this rank (0-based)
+                        rank_index = rank
+
+                    calliper, thread = sline[-1].split('@')
+                    thread_id = int(thread)
+
+                    n_threads = int(len([cal_id for cal_id in calliper_ids if f"{calliper}@" in cal_id]) / n_ranks_in_file)
 
-                    calliper = sline[-1]
                     if not calliper in loaded.data:
-                        loaded.add_calliper(calliper, n_ranks_in_file)
+                        loaded.add_calliper(calliper, n_ranks_in_file, n_threads)
 
-                    loaded.data[calliper].time_percent[data_index] = float(sline[1])
-                    loaded.data[calliper].cumul_time[data_index] = float(sline[2])
-                    loaded.data[calliper].self_time[data_index] = float(sline[3])
-                    loaded.data[calliper].total_time[data_index] = float(sline[4])
-                    loaded.data[calliper].n_calls[data_index] = int(sline[5])
+                    loaded.data[calliper].time_percent[rank_index, thread_id] = float(sline[1])
+                    loaded.data[calliper].cumul_time[rank_index, thread_id] = float(sline[2])
+                    loaded.data[calliper].self_time[rank_index, thread_id] = float(sline[3])
+                    loaded.data[calliper].total_time[rank_index, thread_id] = float(sline[4])
+                    loaded.data[calliper].n_calls[rank_index, thread_id] = int(sline[5])
 
         if not loaded.data:
             raise ValueError(f"No calliper data found in file '{self.path}'.")
@@ -86,14 +92,14 @@ def _load_from_directory(self) -> VernierData:
         for i, vernier_data in enumerate(vernier_datasets):
             for calliper in vernier_data.data.keys():
                 if not calliper in result.data:
-                    result.add_calliper(calliper, len(vernier_datasets))
-
-                result.data[calliper].time_percent[i] = vernier_data.data[calliper].time_percent
-                result.data[calliper].cumul_time[i] = vernier_data.data[calliper].cumul_time
-                result.data[calliper].self_time[i] = vernier_data.data[calliper].self_time
-                result.data[calliper].total_time[i] = vernier_data.data[calliper].total_time
-                result.data[calliper].n_calls[i] = vernier_data.data[calliper].n_calls
-
+                    _, n_threads = vernier_data.data[calliper].self_time.shape
+                    result.add_calliper(calliper, len(vernier_datasets), n_threads)
+
+                result.data[calliper].time_percent[i] = vernier_data.data[calliper].time_percent[0]
+                result.data[calliper].cumul_time[i] = vernier_data.data[calliper].cumul_time[0]
+                result.data[calliper].self_time[i] = vernier_data.data[calliper].self_time[0]
+                result.data[calliper].total_time[i] = vernier_data.data[calliper].total_time[0]
+                result.data[calliper].n_calls[i] = vernier_data.data[calliper].n_calls[0]
 
         return result
 

From b5b8b902b169462e37b1d5958710be0271855905 Mon Sep 17 00:00:00 2001
From: EdHone <edward.hone@metoffice.gov.uk>
Date: Mon, 16 Mar 2026 10:41:43 +0000
Subject: [PATCH 4/9] Tests passing for numpy implementation

---
 .../tests/data/vernier-output-test            |  8 +--
 .../vernier-output-threaded/vernier-output-0  | 21 +++++++
 .../vernier-output-threaded/vernier-output-1  | 21 +++++++
 .../data/vernier-output/vernier-output-test-0 |  4 +-
 .../data/vernier-output/vernier-output-test-1 |  4 +-
 post-processing/tests/test_vernier_caliper.py | 14 ++---
 post-processing/tests/test_vernier_data.py    | 28 ++++-----
 post-processing/tests/test_vernier_reader.py  | 62 +++++++++++++------
 8 files changed, 113 insertions(+), 49 deletions(-)
 create mode 100644 post-processing/tests/data/vernier-output-threaded/vernier-output-0
 create mode 100644 post-processing/tests/data/vernier-output-threaded/vernier-output-1

diff --git a/post-processing/tests/data/vernier-output-test b/post-processing/tests/data/vernier-output-test
index 29bc4c68..de812bf2 100644
--- a/post-processing/tests/data/vernier-output-test
+++ b/post-processing/tests/data/vernier-output-test
@@ -6,8 +6,8 @@ Profiling on 1 thread(s).
                                                                              (Size; Size/sec; Size/call; MinSize; MaxSize)
         (self)        (sec)        (sec)        (sec)                    ms/call   ms/call
 
-    1   44.130        2.583        2.583        5.854              1    2583.167    5853.527    __test_app__
-    2   34.563        4.606        2.023        2.077              2    1011.582    1038.697    some_process
+    1   44.130        2.583        2.583        5.854              1    2583.167    5853.527    __test_app__@0
+    2   34.563        4.606        2.023        2.077              2    1011.582    1038.697    some_process@0
 
 Task 2 of 2 : MPI rank ID 1
 Profiling on 1 thread(s).
@@ -16,5 +16,5 @@ Profiling on 1 thread(s).
                                                                              (Size; Size/sec; Size/call; MinSize; MaxSize)
         (self)        (sec)        (sec)        (sec)                    ms/call   ms/call
 
-    1   43.991        3.009        3.009        3.069              2    1504.362    1534.740    some_process
-    2   37.835        5.596        2.588        6.839              1    2587.638    6839.336    __test_app__
+    1   43.991        3.009        3.009        3.069              2    1504.362    1534.740    some_process@0
+    2   37.835        5.596        2.588        6.839              1    2587.638    6839.336    __test_app__@0
diff --git a/post-processing/tests/data/vernier-output-threaded/vernier-output-0 b/post-processing/tests/data/vernier-output-threaded/vernier-output-0
new file mode 100644
index 00000000..888aabba
--- /dev/null
+++ b/post-processing/tests/data/vernier-output-threaded/vernier-output-0
@@ -0,0 +1,21 @@
+
+Task 1 of 2 : MPI rank ID 0
+Profiling on 4 thread(s).
+
+    #  % Time         Cumul         Self        Total     # of calls        Self   Total    Routine@
+                                                                             (Size; Size/sec; Size/call; MinSize; MaxSize)
+        (self)        (sec)        (sec)        (sec)                    ms/call   ms/call
+
+    1   40.000        2.000        2.000        3.000              2    1000.075    1500.115    MAIN_SUB@0
+    2   40.000        4.000        2.000        5.000              1    2000.140    5000.373    FULL@0
+    3   20.000        5.000        1.000        1.000              1    1000.073    1000.073    MAIN_SUB2@3
+    4   20.000        6.000        1.000        2.000              1    1000.072    2000.152    MAIN_SUB@3
+    5   20.000        7.001        1.000        1.000              1    1000.072    1000.072    MAIN_SUB2@0
+    6   20.000        8.001        1.000        1.000              1    1000.071    1000.071    MAIN_SUB2@2
+    7   20.000        9.001        1.000        1.000              1    1000.069    1000.069    MAIN_SUB2@1
+    8   20.000       10.001        1.000        2.000              1    1000.068    2000.151    MAIN_SUB@2
+    9   20.000       11.001        1.000        2.000              1    1000.067    2000.150    MAIN_SUB@1
+    10   0.000       11.001        0.000        0.000              2       0.008       0.008    __vernier__@2
+    11   0.000       11.001        0.000        0.000              4       0.004       0.004    __vernier__@0
+    12   0.000       11.001        0.000        0.000              2       0.008       0.008    __vernier__@1
+    13   0.000       11.001        0.000        0.000              2       0.007       0.007    __vernier__@3
diff --git a/post-processing/tests/data/vernier-output-threaded/vernier-output-1 b/post-processing/tests/data/vernier-output-threaded/vernier-output-1
new file mode 100644
index 00000000..763a4f58
--- /dev/null
+++ b/post-processing/tests/data/vernier-output-threaded/vernier-output-1
@@ -0,0 +1,21 @@
+
+Task 2 of 2 : MPI rank ID 1
+Profiling on 4 thread(s).
+
+    #  % Time         Cumul         Self        Total     # of calls        Self   Total    Routine@
+                                                                             (Size; Size/sec; Size/call; MinSize; MaxSize)
+        (self)        (sec)        (sec)        (sec)                    ms/call   ms/call
+
+    1   40.031        2.004        2.004        5.006              1    2003.948    5005.940    FULL@0
+    2   39.956        4.004        2.000        3.002              2    1000.084    1500.991    MAIN_SUB@0
+    3   20.056        5.008        1.004        1.004              1    1003.995    1003.995    MAIN_SUB2@1
+    4   20.017        6.010        1.002        1.002              1    1002.034    1002.034    MAIN_SUB2@3
+    5   20.012        7.012        1.002        1.002              1    1001.809    1001.809    MAIN_SUB2@0
+    6   19.979        8.012        1.000        1.000              1    1000.149    1000.149    MAIN_SUB2@2
+    7   19.978        9.012        1.000        2.004              1    1000.063    2004.060    MAIN_SUB@1
+    8   19.978       10.012        1.000        2.002              1    1000.063    2002.101    MAIN_SUB@3
+    9   19.977       11.012        1.000        2.000              1    1000.038    2000.191    MAIN_SUB@2
+    10   0.002       11.012        0.000        0.000              2       0.052       0.052    __vernier__@2
+    11   0.002       11.012        0.000        0.000              2       0.052       0.052    __vernier__@1
+    12   0.002       11.013        0.000        0.000              2       0.052       0.052    __vernier__@3
+    13   0.001       11.013        0.000        0.000              4       0.007       0.007    __vernier__@0
diff --git a/post-processing/tests/data/vernier-output/vernier-output-test-0 b/post-processing/tests/data/vernier-output/vernier-output-test-0
index 8a4d3d78..64d1fc79 100644
--- a/post-processing/tests/data/vernier-output/vernier-output-test-0
+++ b/post-processing/tests/data/vernier-output/vernier-output-test-0
@@ -6,5 +6,5 @@ Profiling on 1 thread(s).
                                                                              (Size; Size/sec; Size/call; MinSize; MaxSize)
         (self)        (sec)        (sec)        (sec)                    ms/call   ms/call
 
-    1   44.130        2.583        2.583        5.854              1    2583.167    5853.527    __test_app__
-    2   34.563        4.606        2.023        2.077              2    1011.582    1038.697    some_process
+    1   44.130        2.583        2.583        5.854              1    2583.167    5853.527    __test_app__@0
+    2   34.563        4.606        2.023        2.077              2    1011.582    1038.697    some_process@0
diff --git a/post-processing/tests/data/vernier-output/vernier-output-test-1 b/post-processing/tests/data/vernier-output/vernier-output-test-1
index 11292d19..d7ffaa91 100644
--- a/post-processing/tests/data/vernier-output/vernier-output-test-1
+++ b/post-processing/tests/data/vernier-output/vernier-output-test-1
@@ -6,5 +6,5 @@ Profiling on 1 thread(s).
                                                                              (Size; Size/sec; Size/call; MinSize; MaxSize)
         (self)        (sec)        (sec)        (sec)                    ms/call   ms/call
 
-    1   43.991        3.009        3.009        3.069              2    1504.362    1534.740    some_process
-    2   37.835        5.596        2.588        6.839              1    2587.638    6839.336    __test_app__
+    1   43.991        3.009        3.009        3.069              2    1504.362    1534.740    some_process@0
+    2   37.835        5.596        2.588        6.839              1    2587.638    6839.336    __test_app__@0
diff --git a/post-processing/tests/test_vernier_caliper.py b/post-processing/tests/test_vernier_caliper.py
index 83ab3f1f..3eefc440 100644
--- a/post-processing/tests/test_vernier_caliper.py
+++ b/post-processing/tests/test_vernier_caliper.py
@@ -13,16 +13,16 @@
 class TestVernierCalliper(unittest.TestCase):
 
     def setUp(self):
-        self.calliper_a = VernierCalliper("test_calliper_a", 2)
-        self.calliper_b = VernierCalliper("test_calliper_b", 2)
+        self.calliper_a = VernierCalliper("test_calliper_a", 2, 1)
+        self.calliper_b = VernierCalliper("test_calliper_b", 2, 1)
 
     def test_init(self):
         self.assertEqual(self.calliper_a.name, "test_calliper_a")
-        self.assertTrue(np.array_equal(self.calliper_a.time_percent, np.array([0.0, 0.0])))
-        self.assertTrue(np.array_equal(self.calliper_a.cumul_time, np.array([0.0, 0.0])))
-        self.assertTrue(np.array_equal(self.calliper_a.self_time, np.array([0.0, 0.0])))
-        self.assertTrue(np.array_equal(self.calliper_a.total_time, np.array([0.0, 0.0])))
-        self.assertTrue(np.array_equal(self.calliper_a.n_calls, np.array([0.0, 0.0])))
+        self.assertTrue(np.array_equal(self.calliper_a.time_percent.flatten(), np.array([0.0, 0.0])))
+        self.assertTrue(np.array_equal(self.calliper_a.cumul_time.flatten(), np.array([0.0, 0.0])))
+        self.assertTrue(np.array_equal(self.calliper_a.self_time.flatten(), np.array([0.0, 0.0])))
+        self.assertTrue(np.array_equal(self.calliper_a.total_time.flatten(), np.array([0.0, 0.0])))
+        self.assertTrue(np.array_equal(self.calliper_a.n_calls.flatten(), np.array([0.0, 0.0])))
 
     def test_reduce(self):
         self.calliper_a.time_percent = np.array([10.0, 20.0])
diff --git a/post-processing/tests/test_vernier_data.py b/post-processing/tests/test_vernier_data.py
index 2ec435df..a026004f 100644
--- a/post-processing/tests/test_vernier_data.py
+++ b/post-processing/tests/test_vernier_data.py
@@ -20,40 +20,40 @@ def setUp(self):
         self.test_data = VernierData()
 
     def test_add_empty_calliper(self):
-        self.test_data.add_calliper("test_calliper", 1)
+        self.test_data.add_calliper("test_calliper", 1, 1)
         self.assertIn("test_calliper", self.test_data.data)
-        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].time_percent, np.array([0.0])))
-        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].cumul_time, np.array([0.0])))
-        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].self_time, np.array([0.0])))
-        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].total_time, np.array([0.0])))
-        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].n_calls, np.array([0.0])))
+        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].time_percent.flatten(), np.array([0.0])))
+        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].cumul_time.flatten(), np.array([0.0])))
+        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].self_time.flatten(), np.array([0.0])))
+        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].total_time.flatten(), np.array([0.0])))
+        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].n_calls.flatten(), np.array([0.0])))
 
     def test_filter_calliper(self):
-        self.test_data.add_calliper("timestep_calliper", 1)
-        self.test_data.add_calliper("other_calliper", 1)
+        self.test_data.add_calliper("timestep_calliper", 1, 1)
+        self.test_data.add_calliper("other_calliper", 1, 1)
         filtered = self.test_data.filter(["timestep"])
         self.assertIn("timestep_calliper", filtered.data)
         self.assertNotIn("other_calliper", filtered.data)
 
     def test_filter_no_match(self):
-        self.test_data.add_calliper("timestep_calliper", 1)
+        self.test_data.add_calliper("timestep_calliper", 1, 1)
         with self.assertRaises(ValueError):
             self.test_data.filter(["nonexistent"])
 
     def test_filter_multiple_matches(self):
-        self.test_data.add_calliper("timestep_calliper_1", 1)
-        self.test_data.add_calliper("timestep_calliper_2", 1)
+        self.test_data.add_calliper("timestep_calliper_1", 1, 1)
+        self.test_data.add_calliper("timestep_calliper_2", 1, 1)
         filtered = self.test_data.filter(["timestep"])
         self.assertIn("timestep_calliper_1", filtered.data)
         self.assertIn("timestep_calliper_2", filtered.data)
 
     def test_filter_empty_keys(self):
-        self.test_data.add_calliper("timestep_calliper", 1)
+        self.test_data.add_calliper("timestep_calliper", 1, 1)
         with self.assertRaises(ValueError):
             self.test_data.filter([])
 
     def test_write_txt_output_file(self):
-        self.test_data.add_calliper("test_calliper", 2)
+        self.test_data.add_calliper("test_calliper", 2, 1)
         self.test_data.data["test_calliper"].time_percent = np.array([10.0, 20.0])
         self.test_data.data["test_calliper"].cumul_time = np.array([30.0, 40.0])
         self.test_data.data["test_calliper"].self_time = np.array([5.0, 15.0])
@@ -67,7 +67,7 @@ def test_write_txt_output_file(self):
             self.assertEqual("| test_calliper |           30.0 |         10.0 |           35.0 |         2 |     15.0 |              15.0 |", contents[1])
 
     def test_write_txt_output_terminal(self):
-        self.test_data.add_calliper("test_calliper", 2)
+        self.test_data.add_calliper("test_calliper", 2, 1)
         self.test_data.data["test_calliper"].time_percent = np.array([50.0, 40.0])
         self.test_data.data["test_calliper"].cumul_time = np.array([10.0, 12.0])
         self.test_data.data["test_calliper"].self_time = np.array([3.0, 4.0])
diff --git a/post-processing/tests/test_vernier_reader.py b/post-processing/tests/test_vernier_reader.py
index e1d465ca..dc390797 100644
--- a/post-processing/tests/test_vernier_reader.py
+++ b/post-processing/tests/test_vernier_reader.py
@@ -30,33 +30,55 @@ def test_load_from_file(self):
         loaded_data = test_reader.load()
         self.assertIn("__test_app__", loaded_data.data)
         self.assertIn("some_process", loaded_data.data)
-        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].n_calls, np.array([1, 1])))
-        self.assertTrue(np.array_equal(loaded_data.data["some_process"].n_calls, np.array([2, 2])))
-        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].time_percent, np.array([44.130, 37.835])))
-        self.assertTrue(np.array_equal(loaded_data.data["some_process"].time_percent, np.array([34.563, 43.991])))
-        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].cumul_time, np.array([2.583, 5.596])))
-        self.assertTrue(np.array_equal(loaded_data.data["some_process"].cumul_time, np.array([4.606, 3.009])))
-        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].self_time, np.array([2.583, 2.588])))
-        self.assertTrue(np.array_equal(loaded_data.data["some_process"].self_time, np.array([2.023, 3.009])))
-        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].total_time, np.array([5.854, 6.839])))
-        self.assertTrue(np.array_equal(loaded_data.data["some_process"].total_time, np.array([2.077, 3.069])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].n_calls.flatten(), np.array([1, 1])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].n_calls.flatten(), np.array([2, 2])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].time_percent.flatten(), np.array([44.130, 37.835])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].time_percent.flatten(), np.array([34.563, 43.991])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].cumul_time.flatten(), np.array([2.583, 5.596])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].cumul_time.flatten(), np.array([4.606, 3.009])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].self_time.flatten(), np.array([2.583, 2.588])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].self_time.flatten(), np.array([2.023, 3.009])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].total_time.flatten(), np.array([5.854, 6.839])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].total_time.flatten(), np.array([2.077, 3.069])))
 
     def test_load_from_directory(self):
         test_reader = VernierReader(self.test_data_dir / "vernier-output")
         loaded_data = test_reader.load()
         self.assertIn("__test_app__", loaded_data.data)
         self.assertIn("some_process", loaded_data.data)
-        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].n_calls, np.array([1, 1])))
-        self.assertTrue(np.array_equal(loaded_data.data["some_process"].n_calls, np.array([2, 2])))
-        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].time_percent, np.array([44.130, 37.835])))
-        self.assertTrue(np.array_equal(loaded_data.data["some_process"].time_percent, np.array([34.563, 43.991])))
-        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].cumul_time, np.array([2.583, 5.596])))
-        self.assertTrue(np.array_equal(loaded_data.data["some_process"].cumul_time, np.array([4.606, 3.009])))
-        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].self_time, np.array([2.583, 2.588])))
-        self.assertTrue(np.array_equal(loaded_data.data["some_process"].self_time, np.array([2.023, 3.009])))
-        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].total_time, np.array([5.854, 6.839])))
-        self.assertTrue(np.array_equal(loaded_data.data["some_process"].total_time, np.array([2.077, 3.069])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].n_calls.flatten(), np.array([1, 1])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].n_calls.flatten(), np.array([2, 2])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].time_percent.flatten(), np.array([44.130, 37.835])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].time_percent.flatten(), np.array([34.563, 43.991])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].cumul_time.flatten(), np.array([2.583, 5.596])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].cumul_time.flatten(), np.array([4.606, 3.009])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].self_time.flatten(), np.array([2.583, 2.588])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].self_time.flatten(), np.array([2.023, 3.009])))
+        self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].total_time.flatten(), np.array([5.854, 6.839])))
+        self.assertTrue(np.array_equal(loaded_data.data["some_process"].total_time.flatten(), np.array([2.077, 3.069])))
 
+    def test_load_threaded_data_from_directory(self):
+        test_reader = VernierReader(self.test_data_dir / "vernier-output-threaded")
+        loaded_data = test_reader.load()
+        self.assertIn("FULL", loaded_data.data)
+        self.assertIn("MAIN_SUB", loaded_data.data)
+        self.assertIn("MAIN_SUB2", loaded_data.data)
+
+        self.assertTrue(np.array_equal(loaded_data.data["FULL"].n_calls, np.array([[1], [1]])))
+        self.assertTrue(np.array_equal(loaded_data.data["FULL"].time_percent, np.array([[40.0], [40.031]])))
+        self.assertTrue(np.array_equal(loaded_data.data["FULL"].self_time, np.array([[2.0], [2.004]])))
+        self.assertTrue(np.array_equal(loaded_data.data["FULL"].total_time, np.array([[5.0], [5.006]])))
+        self.assertTrue(np.array_equal(loaded_data.data["FULL"].cumul_time, np.array([[4.0], [2.004]])))
+        self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB"].n_calls, np.array([[2, 1, 1, 1], [2, 1, 1, 1]])))
+        self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB"].time_percent, np.array([[40.0, 20.0, 20.0, 20.0], [39.956, 19.978, 19.977, 19.978]])))
+        self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB"].self_time, np.array([[2.0, 1.0, 1.0, 1.0], [2.0, 1.0, 1.0, 1.0]])))
+        self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB"].total_time, np.array([[3.0, 2.0, 2.0, 2.0], [3.002, 2.004, 2.0, 2.002]])))
+        self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB"].cumul_time, np.array([[2.0, 11.001, 10.001, 6.0], [4.004, 9.012, 11.012, 10.012]])))
+        self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB2"].n_calls, np.array([[1, 1, 1, 1], [1, 1, 1, 1]])))
+        self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB2"].time_percent, np.array([[20.0, 20.0, 20.0, 20.0], [20.012, 20.056, 19.979, 20.017]])))
+        self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB2"].self_time, np.array([[1.0, 1.0, 1.0, 1.0], [1.002, 1.004, 1.0, 1.002]])))
+        self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB2"].total_time, np.array([[1.0, 1.0, 1.0, 1.0], [1.002, 1.004, 1.0, 1.002]])))
+        self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB2"].cumul_time, np.array([[7.001, 9.001, 8.001, 5.0], [7.012, 5.008, 8.012, 6.01]])))
 
 if __name__ == '__main__':
     unittest.main()

From dfa8c81bce226329f50000bee8f0a1f87acc624f Mon Sep 17 00:00:00 2001
From: EdHone <edward.hone@metoffice.gov.uk>
Date: Mon, 16 Mar 2026 16:24:25 +0000
Subject: [PATCH 5/9] Re-instate tests and fix

---
 post-processing/tests/test_vernier_data.py | 73 ++++++++++++++++++++++
 post-processing/vernier/vernier_data.py    | 22 +++++--
 2 files changed, 89 insertions(+), 6 deletions(-)

diff --git a/post-processing/tests/test_vernier_data.py b/post-processing/tests/test_vernier_data.py
index a026004f..3e7f065c 100644
--- a/post-processing/tests/test_vernier_data.py
+++ b/post-processing/tests/test_vernier_data.py
@@ -83,5 +83,78 @@ def test_write_txt_output_terminal(self):
         self.assertEqual("| test_calliper |           35.0 |          3.5 |           11.0 |         2 |     45.0 |              17.5 |", write_output.getvalue().splitlines()[1])
 
 
+    def test_get(self):
+        data1 = VernierData()
+        data1.add_calliper("calliper_a", 2, 1)
+        data1.data["calliper_a"].time_percent = np.array([10.0, 20.0])
+        data1.data["calliper_a"].cumul_time = np.array([30.0, 40.0])
+        data1.data["calliper_a"].self_time = np.array([5.0, 15.0])
+        data1.data["calliper_a"].total_time = np.array([25.0, 35.0])
+        data1.data["calliper_a"].n_calls = np.array([2, 2])
+        self.assertEqual(len(data1.get("calliper_a")), 2)
+
+
+class TestVernierCollation(unittest.TestCase):
+    """
+    Tests for the VernierData Collation class.
+    """
+    def _add_data(self):
+        self.collation = VernierDataCollation()
+        data1 = VernierData()
+        data1.add_calliper("calliper_a", 2, 1)
+        data1.data["calliper_a"].time_percent = np.array([[10.0], [20.0]])
+        data1.data["calliper_a"].cumul_time = np.array([[30.0], [40.0]])
+        data1.data["calliper_a"].self_time = np.array([[5.0], [15.0]])
+        data1.data["calliper_a"].total_time = np.array([[25.0], [35.0]])
+        data1.data["calliper_a"].n_calls = np.array([[2], [2]])
+
+        data2 = VernierData()
+        data2.add_calliper("calliper_a", 2, 1)
+        data2.data["calliper_a"].time_percent = np.array([[15.0], [25.0]])
+        data2.data["calliper_a"].cumul_time = np.array([[35.0], [45.0]])
+        data2.data["calliper_a"].self_time = np.array([[6.0], [16.0]])
+        data2.data["calliper_a"].total_time = np.array([[28.0], [38.0]])
+        data2.data["calliper_a"].n_calls = np.array([[3], [3]])
+
+        self.collation.add_data('test1', data1)
+        self.collation.add_data('test2', data2)
+        
+    def test_add_data(self):
+        self._add_data()
+        self.assertEqual(len(self.collation), 2)
+
+    def test_remove_data(self):
+        self._add_data()
+        self.collation.remove_data('test1')
+        self.assertEqual(len(self.collation), 1)
+
+    def test_get(self):
+        self._add_data()
+        calliper_a = self.collation.get("calliper_a")
+        self.assertEqual(len(calliper_a), 4)
+
+    def test_internal_consistency(self):
+        self._add_data()
+        data_inc = VernierData()
+        data_inc.add_calliper("calliper_a", 2, 1)
+        data_inc.data["calliper_a"].time_percent = np.array([[10.0], [20.0]])
+        data_inc.data["calliper_a"].cumul_time = np.array([[30.0], [40.0]])
+        data_inc.data["calliper_a"].self_time = np.array([[5.0], [15.0]])
+        data_inc.data["calliper_a"].total_time = np.array([[25.0], [35.0]])
+        data_inc.data["calliper_a"].n_calls = np.array([[2], [2]])
+
+        data_inc.add_calliper("calliper_b", 2, 1)
+        data_inc.data["calliper_b"].time_percent = np.array([[15.0], [25.0]])
+        data_inc.data["calliper_b"].cumul_time = np.array([[35.0], [45.0]])
+        data_inc.data["calliper_b"].self_time = np.array([[6.0], [16.0]])
+        data_inc.data["calliper_b"].total_time = np.array([[28.0], [38.0]])
+        data_inc.data["calliper_b"].n_calls = np.array([[3], [3]])
+
+        with self.assertRaises(ValueError) as test_exception:
+            self.collation.add_data('test3', data_inc)
+        self.assertEqual(str(test_exception.exception),
+                         "inconsistent callipers in new_vernier_data")
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/post-processing/vernier/vernier_data.py b/post-processing/vernier/vernier_data.py
index 82e24a61..96c8c172 100644
--- a/post-processing/vernier/vernier_data.py
+++ b/post-processing/vernier/vernier_data.py
@@ -17,11 +17,15 @@ class VernierCalliper():
     time_percent: np.ndarray
     cumul_time: np.ndarray
     n_calls: np.ndarray
+    n_ranks: int
+    n_threads: int
     name: str
 
     def __init__(self, name: str, n_ranks: int, n_threads: int):
 
         self.name = name
+        self.ranks = n_ranks
+        self.threads = n_threads
         self.time_percent = np.zeros((n_ranks, n_threads))
         self.cumul_time = np.zeros((n_ranks, n_threads))
         self.self_time = np.zeros((n_ranks, n_threads))
@@ -215,12 +219,18 @@ def get(self, calliper_key):
         if calliper_key not in self.calliper_list():
             return None
         self.internal_consistency()
-        results = VernierCalliper(calliper_key)
+
+        n_ranks = sum([vdata.data[calliper_key].ranks for _, vdata in self.vernier_data.items()])
+        n_threads = max([vdata.data[calliper_key].threads for _, vdata in self.vernier_data.items()])
+        results = VernierCalliper(calliper_key, n_ranks, n_threads)
+        rank_begin_index = 0
         for akey, vdata in self.vernier_data.items():
-            results.total_time += vdata.data[calliper_key].total_time
-            results.time_percent += vdata.data[calliper_key].time_percent
-            results.self_time += vdata.data[calliper_key].self_time
-            results.cumul_time += vdata.data[calliper_key].cumul_time
-            results.n_calls += vdata.data[calliper_key].n_calls
+            rank_end_index = rank_begin_index + vdata.data[calliper_key].ranks
+            results.total_time[rank_begin_index:rank_end_index][:] = vdata.data[calliper_key].total_time
+            results.time_percent[rank_begin_index:rank_end_index][:] = vdata.data[calliper_key].time_percent
+            results.self_time[rank_begin_index:rank_end_index][:] = vdata.data[calliper_key].self_time
+            results.cumul_time[rank_begin_index:rank_end_index][:] = vdata.data[calliper_key].cumul_time
+            results.n_calls[rank_begin_index:rank_end_index][:] = vdata.data[calliper_key].n_calls
+            rank_begin_index = rank_end_index
 
         return results

From 6bad072691e7f396f3895b90db8de02534cd2352 Mon Sep 17 00:00:00 2001
From: EdHone <edward.hone@metoffice.gov.uk>
Date: Mon, 16 Mar 2026 16:30:13 +0000
Subject: [PATCH 6/9] Fix txt output

---
 post-processing/vernier/vernier_data.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/post-processing/vernier/vernier_data.py b/post-processing/vernier/vernier_data.py
index 96c8c172..7dd91f48 100644
--- a/post-processing/vernier/vernier_data.py
+++ b/post-processing/vernier/vernier_data.py
@@ -61,7 +61,7 @@ def reduce(self) -> list:
     @classmethod
     def labels(self):
         return ["Routine", "Total time (s)", "Self (s)", "Cumul time (s)",
-                "No. calls", "% time", "Time per call (s)"]
+                "Max no. calls", "% time", "Time per call (s)"]
 
     def __lt__(self, other):
         """Comparison method for sorting callipers by self time."""
@@ -120,11 +120,14 @@ def write_txt_output(self, txt_path: Optional[Path] = None):
         it is printed to the terminal."""
 
         txt_table = []
+        labels = None
         for calliper in self.data.keys():
             txt_table.append(self.data[calliper].reduce())
+            if labels is None:
+                labels = self.data[calliper].labels()
         txt_table = sorted(txt_table, key=lambda x: x[2], reverse=True) # sort by self time, descending
 
-        txt_table.insert(0, ["Routine", "Total time (s)", "Self (s)", "Cumul time (s)", "No. calls", "% time", "Time per call (s)"])
+        txt_table.insert(0, labels)
 
         max_calliper_len = max([len(line[0]) for line in txt_table])
 
@@ -135,7 +138,7 @@ def write_txt_output(self, txt_path: Optional[Path] = None):
             out = open(txt_path, 'w')
 
         for row in txt_table:
-            out.write('| {:>{}} | {:>14} | {:>12} | {:>14} | {:>9} | {:>8} | {:>17} |\n'.format(row[0], max_calliper_len, *row[1:]))
+            out.write('| {:>{}} | {:>14} | {:>12} | {:>14} | {:>13} | {:>8} | {:>17} |\n'.format(row[0], max_calliper_len, *row[1:]))
 
         if txt_path is not None:
             out.close()

From 1c6025b039cb567536f622e639c50198f4389154 Mon Sep 17 00:00:00 2001
From: EdHone <edward.hone@metoffice.gov.uk>
Date: Tue, 17 Mar 2026 09:48:54 +0000
Subject: [PATCH 7/9] updated array initialisation to be filled with nans
 rather than zeroes

---
 post-processing/tests/test_cli_tools.py       |  6 +++---
 post-processing/tests/test_vernier_caliper.py | 10 +++++-----
 post-processing/tests/test_vernier_data.py    | 13 ++++---------
 post-processing/vernier/vernier_data.py       | 12 ++++++------
 4 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/post-processing/tests/test_cli_tools.py b/post-processing/tests/test_cli_tools.py
index 9ecdeb53..ea42edf9 100644
--- a/post-processing/tests/test_cli_tools.py
+++ b/post-processing/tests/test_cli_tools.py
@@ -14,9 +14,9 @@ def setUp(self):
         self.tools_dir = Path(__file__).parent.parent / 'tools'
         self.test_data_dir = Path(__file__).parent / 'data'
         self.test_data_kgo = (
-            '|      Routine | Total time (s) |     Self (s) | Cumul time (s) | No. calls |   % time | Time per call (s) |\n' +
-            '| __test_app__ |         6.3465 |       2.5855 |         4.0895 |         1 |  40.9825 |            6.3465 |\n' +
-            '| some_process |          2.573 |        2.516 |         3.8075 |         2 |   39.277 |            1.2865 |\n'
+            '|      Routine | Total time (s) |     Self (s) | Cumul time (s) | Max no. calls |   % time | Time per call (s) |\n' +
+            '| __test_app__ |         6.3465 |       2.5855 |         4.0895 |             1 |  40.9825 |            6.3465 |\n' +
+            '| some_process |          2.573 |        2.516 |         3.8075 |             2 |   39.277 |            1.2865 |\n'
             )
         return super().setUp()
 
diff --git a/post-processing/tests/test_vernier_caliper.py b/post-processing/tests/test_vernier_caliper.py
index 3eefc440..3cccab62 100644
--- a/post-processing/tests/test_vernier_caliper.py
+++ b/post-processing/tests/test_vernier_caliper.py
@@ -18,11 +18,11 @@ def setUp(self):
 
     def test_init(self):
         self.assertEqual(self.calliper_a.name, "test_calliper_a")
-        self.assertTrue(np.array_equal(self.calliper_a.time_percent.flatten(), np.array([0.0, 0.0])))
-        self.assertTrue(np.array_equal(self.calliper_a.cumul_time.flatten(), np.array([0.0, 0.0])))
-        self.assertTrue(np.array_equal(self.calliper_a.self_time.flatten(), np.array([0.0, 0.0])))
-        self.assertTrue(np.array_equal(self.calliper_a.total_time.flatten(), np.array([0.0, 0.0])))
-        self.assertTrue(np.array_equal(self.calliper_a.n_calls.flatten(), np.array([0.0, 0.0])))
+        self.assertEqual(self.calliper_a.time_percent.shape, (2, 1))
+        self.assertEqual(self.calliper_a.cumul_time.shape, (2, 1))
+        self.assertEqual(self.calliper_a.self_time.shape, (2, 1))
+        self.assertEqual(self.calliper_a.total_time.shape, (2, 1))
+        self.assertEqual(self.calliper_a.n_calls.shape, (2, 1))
 
     def test_reduce(self):
         self.calliper_a.time_percent = np.array([10.0, 20.0])
diff --git a/post-processing/tests/test_vernier_data.py b/post-processing/tests/test_vernier_data.py
index 3e7f065c..d045d155 100644
--- a/post-processing/tests/test_vernier_data.py
+++ b/post-processing/tests/test_vernier_data.py
@@ -22,11 +22,6 @@ def setUp(self):
     def test_add_empty_calliper(self):
         self.test_data.add_calliper("test_calliper", 1, 1)
         self.assertIn("test_calliper", self.test_data.data)
-        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].time_percent.flatten(), np.array([0.0])))
-        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].cumul_time.flatten(), np.array([0.0])))
-        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].self_time.flatten(), np.array([0.0])))
-        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].total_time.flatten(), np.array([0.0])))
-        self.assertTrue(np.array_equal(self.test_data.data["test_calliper"].n_calls.flatten(), np.array([0.0])))
 
     def test_filter_calliper(self):
         self.test_data.add_calliper("timestep_calliper", 1, 1)
@@ -63,8 +58,8 @@ def test_write_txt_output_file(self):
         with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
             self.test_data.write_txt_output(Path(tmp_file.name))
             contents = Path(tmp_file.name).read_text().splitlines()
-            self.assertEqual("|       Routine | Total time (s) |     Self (s) | Cumul time (s) | No. calls |   % time | Time per call (s) |", contents[0])
-            self.assertEqual("| test_calliper |           30.0 |         10.0 |           35.0 |         2 |     15.0 |              15.0 |", contents[1])
+            self.assertEqual("|       Routine | Total time (s) |     Self (s) | Cumul time (s) | Max no. calls |   % time | Time per call (s) |", contents[0])
+            self.assertEqual("| test_calliper |           30.0 |         10.0 |           35.0 |             2 |     15.0 |              15.0 |", contents[1])
 
     def test_write_txt_output_terminal(self):
         self.test_data.add_calliper("test_calliper", 2, 1)
@@ -79,8 +74,8 @@ def test_write_txt_output_terminal(self):
         self.test_data.write_txt_output()
         sys.stdout = sys.__stdout__
 
-        self.assertEqual("|       Routine | Total time (s) |     Self (s) | Cumul time (s) | No. calls |   % time | Time per call (s) |", write_output.getvalue().splitlines()[0])
-        self.assertEqual("| test_calliper |           35.0 |          3.5 |           11.0 |         2 |     45.0 |              17.5 |", write_output.getvalue().splitlines()[1])
+        self.assertEqual("|       Routine | Total time (s) |     Self (s) | Cumul time (s) | Max no. calls |   % time | Time per call (s) |", write_output.getvalue().splitlines()[0])
+        self.assertEqual("| test_calliper |           35.0 |          3.5 |           11.0 |             2 |     45.0 |              17.5 |", write_output.getvalue().splitlines()[1])
 
 
     def test_get(self):
diff --git a/post-processing/vernier/vernier_data.py b/post-processing/vernier/vernier_data.py
index 7dd91f48..82b5c2b3 100644
--- a/post-processing/vernier/vernier_data.py
+++ b/post-processing/vernier/vernier_data.py
@@ -21,16 +21,16 @@ class VernierCalliper():
     n_threads: int
     name: str
 
-    def __init__(self, name: str, n_ranks: int, n_threads: int):
+    def __init__(self, name: str, n_ranks: int, n_threads: int = 1):
 
         self.name = name
         self.ranks = n_ranks
         self.threads = n_threads
-        self.time_percent = np.zeros((n_ranks, n_threads))
-        self.cumul_time = np.zeros((n_ranks, n_threads))
-        self.self_time = np.zeros((n_ranks, n_threads))
-        self.total_time = np.zeros((n_ranks, n_threads))
-        self.n_calls = np.zeros((n_ranks, n_threads), dtype=int)
+        self.time_percent = np.full((n_ranks, n_threads), np.nan)
+        self.cumul_time = np.full((n_ranks, n_threads), np.nan)
+        self.self_time = np.full((n_ranks, n_threads), np.nan)
+        self.total_time = np.full((n_ranks, n_threads), np.nan)
+        self.n_calls = np.full((n_ranks, n_threads), np.nan)
 
         return
 

From fe38db8e5246f27bf2b3921880b90e6c97f8fdfd Mon Sep 17 00:00:00 2001
From: EdHone <edward.hone@metoffice.gov.uk>
Date: Tue, 17 Mar 2026 10:03:19 +0000
Subject: [PATCH 8/9] Fix final issues

---
 post-processing/vernier/vernier_data.py | 29 ++++++++++++-------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/post-processing/vernier/vernier_data.py b/post-processing/vernier/vernier_data.py
index 82b5c2b3..5c3cc3e6 100644
--- a/post-processing/vernier/vernier_data.py
+++ b/post-processing/vernier/vernier_data.py
@@ -50,12 +50,14 @@ def reduce(self) -> list:
 
         return [
             self.name.replace('@0', ''), # calliper name
-            round(self.total_time.mean(), 5), # mean total time across calls
-            round(self.self_time.mean(), 5), # mean self time across calls
-            round(self.cumul_time.mean(), 5), # mean cumulative time across calls
-            int(self.n_calls.max())    , # number of calls
-            round(self.time_percent.mean(), 5), # mean percentage of time across calls
-            round(np.mean(self.total_time / self.n_calls), 5) # mean time per call
+            # We use nanmean to avoid NaN's where array shapes are non-conformant
+            # due to collated data possibly having different numbers of threads
+            round(np.nanmean(self.total_time), 5), # mean total time across calls
+            round(np.nanmean(self.self_time), 5), # mean self time across calls
+            round(np.nanmean(self.cumul_time), 5), # mean cumulative time across calls
+            int(np.nanmax(self.n_calls))    , # number of calls
+            round(np.nanmean(self.time_percent), 5), # mean percentage of time across calls
+            round(np.nanmean(self.total_time / self.n_calls), 5) # mean time per call
         ]
 
     @classmethod
@@ -120,14 +122,11 @@ def write_txt_output(self, txt_path: Optional[Path] = None):
         it is printed to the terminal."""
 
         txt_table = []
-        labels = None
         for calliper in self.data.keys():
             txt_table.append(self.data[calliper].reduce())
-            if labels is None:
-                labels = self.data[calliper].labels()
         txt_table = sorted(txt_table, key=lambda x: x[2], reverse=True) # sort by self time, descending
 
-        txt_table.insert(0, labels)
+        txt_table.insert(0, VernierCalliper.labels())
 
         max_calliper_len = max([len(line[0]) for line in txt_table])
 
@@ -143,7 +142,7 @@ def write_txt_output(self, txt_path: Optional[Path] = None):
         if txt_path is not None:
             out.close()
 
-    def get(self, calliper_key):
+    def get(self, calliper_key) -> VernierCalliper:
         """
         Return a VernierCalliper of the data for this calliper_key,
         or None if it does not exist.
@@ -188,7 +187,7 @@ def internal_consistency(self, new_vernier_data=None):
         """
         # notImplemented enforce consistent sizing of members?? needed?
         callipers = []
-        for k, vdata in self.vernier_data.items():
+        for _, vdata in self.vernier_data.items():
             loop_callipers = sorted(list(vdata.data.keys()))
             if len(callipers) == 0:
                 callipers = loop_callipers
@@ -208,12 +207,12 @@ def calliper_list(self):
         result = []
         self.internal_consistency()
 
-        for k, vdata in self.vernier_data.items():
+        for _, vdata in self.vernier_data.items():
             result = sorted(list(vdata.data.keys()))
             break
         return result
 
-    def get(self, calliper_key):
+    def get(self, calliper_key) -> VernierCalliper:
         """
         Return a VernierCalliper of all the data from all collation members
         for this calliper_key, or None if it does not exist.
@@ -227,7 +226,7 @@ def get(self, calliper_key):
         n_threads = max([vdata.data[calliper_key].threads for _, vdata in self.vernier_data.items()])
         results = VernierCalliper(calliper_key, n_ranks, n_threads)
         rank_begin_index = 0
-        for akey, vdata in self.vernier_data.items():
+        for _, vdata in self.vernier_data.items():
             rank_end_index = rank_begin_index + vdata.data[calliper_key].ranks
             results.total_time[rank_begin_index:rank_end_index][:] = vdata.data[calliper_key].total_time
             results.time_percent[rank_begin_index:rank_end_index][:] = vdata.data[calliper_key].time_percent

From 94b4d2d0da22caabd32f19617a23cfe81c72bde7 Mon Sep 17 00:00:00 2001
From: EdHone <edward.hone@metoffice.gov.uk>
Date: Tue, 17 Mar 2026 10:30:40 +0000
Subject: [PATCH 9/9] Sign CLA

---
 CONTRIBUTORS.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 1f46c326..2de071e2 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -4,4 +4,5 @@
 |-----------------|------------------|-------------|------------|
 | andrewcoughtrie | Andrew Coughtrie | Met Office  | 2026-03-05 |
 | mo-marqh        | mark Hedley      | Met Office  | 2026-03-10 |
-| oakleybrunt     | Oakley Brunt     | Met Office  | 2026-03-09 |
\ No newline at end of file
+| oakleybrunt     | Oakley Brunt     | Met Office  | 2026-03-09 |
+| EdHone          | Ed Hone          | Met Office  | 2026-03-17 |
\ No newline at end of file