diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 1f46c326..2de071e2 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -4,4 +4,5 @@ |-----------------|------------------|-------------|------------| | andrewcoughtrie | Andrew Coughtrie | Met Office | 2026-03-05 | | mo-marqh | mark Hedley | Met Office | 2026-03-10 | -| oakleybrunt | Oakley Brunt | Met Office | 2026-03-09 | \ No newline at end of file +| oakleybrunt | Oakley Brunt | Met Office | 2026-03-09 | +| EdHone | Ed Hone | Met Office | 2026-03-17 | \ No newline at end of file diff --git a/post-processing/tests/data/vernier-output-test b/post-processing/tests/data/vernier-output-test index 29bc4c68..de812bf2 100644 --- a/post-processing/tests/data/vernier-output-test +++ b/post-processing/tests/data/vernier-output-test @@ -6,8 +6,8 @@ Profiling on 1 thread(s). (Size; Size/sec; Size/call; MinSize; MaxSize) (self) (sec) (sec) (sec) ms/call ms/call - 1 44.130 2.583 2.583 5.854 1 2583.167 5853.527 __test_app__ - 2 34.563 4.606 2.023 2.077 2 1011.582 1038.697 some_process + 1 44.130 2.583 2.583 5.854 1 2583.167 5853.527 __test_app__@0 + 2 34.563 4.606 2.023 2.077 2 1011.582 1038.697 some_process@0 Task 2 of 2 : MPI rank ID 1 Profiling on 1 thread(s). @@ -16,5 +16,5 @@ Profiling on 1 thread(s). (Size; Size/sec; Size/call; MinSize; MaxSize) (self) (sec) (sec) (sec) ms/call ms/call - 1 43.991 3.009 3.009 3.069 2 1504.362 1534.740 some_process - 2 37.835 5.596 2.588 6.839 1 2587.638 6839.336 __test_app__ + 1 43.991 3.009 3.009 3.069 2 1504.362 1534.740 some_process@0 + 2 37.835 5.596 2.588 6.839 1 2587.638 6839.336 __test_app__@0 diff --git a/post-processing/tests/data/vernier-output-threaded/vernier-output-0 b/post-processing/tests/data/vernier-output-threaded/vernier-output-0 new file mode 100644 index 00000000..888aabba --- /dev/null +++ b/post-processing/tests/data/vernier-output-threaded/vernier-output-0 @@ -0,0 +1,21 @@ + +Task 1 of 2 : MPI rank ID 0 +Profiling on 4 thread(s). + + # % Time Cumul Self Total # of calls Self Total Routine@ + (Size; Size/sec; Size/call; MinSize; MaxSize) + (self) (sec) (sec) (sec) ms/call ms/call + + 1 40.000 2.000 2.000 3.000 2 1000.075 1500.115 MAIN_SUB@0 + 2 40.000 4.000 2.000 5.000 1 2000.140 5000.373 FULL@0 + 3 20.000 5.000 1.000 1.000 1 1000.073 1000.073 MAIN_SUB2@3 + 4 20.000 6.000 1.000 2.000 1 1000.072 2000.152 MAIN_SUB@3 + 5 20.000 7.001 1.000 1.000 1 1000.072 1000.072 MAIN_SUB2@0 + 6 20.000 8.001 1.000 1.000 1 1000.071 1000.071 MAIN_SUB2@2 + 7 20.000 9.001 1.000 1.000 1 1000.069 1000.069 MAIN_SUB2@1 + 8 20.000 10.001 1.000 2.000 1 1000.068 2000.151 MAIN_SUB@2 + 9 20.000 11.001 1.000 2.000 1 1000.067 2000.150 MAIN_SUB@1 + 10 0.000 11.001 0.000 0.000 2 0.008 0.008 __vernier__@2 + 11 0.000 11.001 0.000 0.000 4 0.004 0.004 __vernier__@0 + 12 0.000 11.001 0.000 0.000 2 0.008 0.008 __vernier__@1 + 13 0.000 11.001 0.000 0.000 2 0.007 0.007 __vernier__@3 diff --git a/post-processing/tests/data/vernier-output-threaded/vernier-output-1 b/post-processing/tests/data/vernier-output-threaded/vernier-output-1 new file mode 100644 index 00000000..763a4f58 --- /dev/null +++ b/post-processing/tests/data/vernier-output-threaded/vernier-output-1 @@ -0,0 +1,21 @@ + +Task 2 of 2 : MPI rank ID 1 +Profiling on 4 thread(s). + + # % Time Cumul Self Total # of calls Self Total Routine@ + (Size; Size/sec; Size/call; MinSize; MaxSize) + (self) (sec) (sec) (sec) ms/call ms/call + + 1 40.031 2.004 2.004 5.006 1 2003.948 5005.940 FULL@0 + 2 39.956 4.004 2.000 3.002 2 1000.084 1500.991 MAIN_SUB@0 + 3 20.056 5.008 1.004 1.004 1 1003.995 1003.995 MAIN_SUB2@1 + 4 20.017 6.010 1.002 1.002 1 1002.034 1002.034 MAIN_SUB2@3 + 5 20.012 7.012 1.002 1.002 1 1001.809 1001.809 MAIN_SUB2@0 + 6 19.979 8.012 1.000 1.000 1 1000.149 1000.149 MAIN_SUB2@2 + 7 19.978 9.012 1.000 2.004 1 1000.063 2004.060 MAIN_SUB@1 + 8 19.978 10.012 1.000 2.002 1 1000.063 2002.101 MAIN_SUB@3 + 9 19.977 11.012 1.000 2.000 1 1000.038 2000.191 MAIN_SUB@2 + 10 0.002 11.012 0.000 0.000 2 0.052 0.052 __vernier__@2 + 11 0.002 11.012 0.000 0.000 2 0.052 0.052 __vernier__@1 + 12 0.002 11.013 0.000 0.000 2 0.052 0.052 __vernier__@3 + 13 0.001 11.013 0.000 0.000 4 0.007 0.007 __vernier__@0 diff --git a/post-processing/tests/data/vernier-output/vernier-output-test-0 b/post-processing/tests/data/vernier-output/vernier-output-test-0 index 8a4d3d78..64d1fc79 100644 --- a/post-processing/tests/data/vernier-output/vernier-output-test-0 +++ b/post-processing/tests/data/vernier-output/vernier-output-test-0 @@ -6,5 +6,5 @@ Profiling on 1 thread(s). (Size; Size/sec; Size/call; MinSize; MaxSize) (self) (sec) (sec) (sec) ms/call ms/call - 1 44.130 2.583 2.583 5.854 1 2583.167 5853.527 __test_app__ - 2 34.563 4.606 2.023 2.077 2 1011.582 1038.697 some_process + 1 44.130 2.583 2.583 5.854 1 2583.167 5853.527 __test_app__@0 + 2 34.563 4.606 2.023 2.077 2 1011.582 1038.697 some_process@0 diff --git a/post-processing/tests/data/vernier-output/vernier-output-test-1 b/post-processing/tests/data/vernier-output/vernier-output-test-1 index 11292d19..d7ffaa91 100644 --- a/post-processing/tests/data/vernier-output/vernier-output-test-1 +++ b/post-processing/tests/data/vernier-output/vernier-output-test-1 @@ -6,5 +6,5 @@ Profiling on 1 thread(s). (Size; Size/sec; Size/call; MinSize; MaxSize) (self) (sec) (sec) (sec) ms/call ms/call - 1 43.991 3.009 3.009 3.069 2 1504.362 1534.740 some_process - 2 37.835 5.596 2.588 6.839 1 2587.638 6839.336 __test_app__ + 1 43.991 3.009 3.009 3.069 2 1504.362 1534.740 some_process@0 + 2 37.835 5.596 2.588 6.839 1 2587.638 6839.336 __test_app__@0 diff --git a/post-processing/tests/test_cli_tools.py b/post-processing/tests/test_cli_tools.py index 4607cd18..66b477d2 100644 --- a/post-processing/tests/test_cli_tools.py +++ b/post-processing/tests/test_cli_tools.py @@ -23,9 +23,9 @@ def setUp(self): self.test_data_dir = Path(__file__).parent / 'data' # pylint: disable=line-too-long self.test_data_kgo = ( - '| Routine | Total time (s) | Self (s) | Cumul time (s) | No. calls | % time | Time per call (s) |\n' + - '| __test_app__ | 6.3465 | 2.5855 | 4.0895 | 1 | 40.9825 | 6.3465 |\n' + - '| some_process | 2.573 | 2.516 | 3.8075 | 2 | 39.277 | 1.2865 |\n' + '| Routine | Total time (s) | Self (s) | Cumul time (s) | Max no. calls | % time | Time per call (s) |\n' + + '| __test_app__ | 6.3465 | 2.5855 | 4.0895 | 1 | 40.9825 | 6.3465 |\n' + + '| some_process | 2.573 | 2.516 | 3.8075 | 2 | 39.277 | 1.2865 |\n' ) return super().setUp() diff --git a/post-processing/tests/test_vernier_calliper.py b/post-processing/tests/test_vernier_calliper.py index 3f9bf916..570ae1dd 100644 --- a/post-processing/tests/test_vernier_calliper.py +++ b/post-processing/tests/test_vernier_calliper.py @@ -8,6 +8,7 @@ """ import unittest +import numpy as np from pathlib import Path import sys @@ -24,30 +25,29 @@ def setUp(self): """ Initialise useful attributes for testing. """ - self.calliper_a = VernierCalliper("test_calliper_a") - self.calliper_b = VernierCalliper("test_calliper_b") + self.calliper_a = VernierCalliper("test_calliper_a", 2, 1) + self.calliper_b = VernierCalliper("test_calliper_b", 2, 1) def test_init(self): """ Test that the class initialises as expected (empty lists) """ self.assertEqual(self.calliper_a.name, "test_calliper_a") - self.assertEqual(self.calliper_a.time_percent, []) - self.assertEqual(self.calliper_a.cumul_time, []) - self.assertEqual(self.calliper_a.self_time, []) - self.assertEqual(self.calliper_a.total_time, []) - self.assertEqual(self.calliper_a.n_calls, []) + self.assertEqual(self.calliper_a.time_percent.shape, (2, 1)) + self.assertEqual(self.calliper_a.cumul_time.shape, (2, 1)) + self.assertEqual(self.calliper_a.self_time.shape, (2, 1)) + self.assertEqual(self.calliper_a.total_time.shape, (2, 1)) + self.assertEqual(self.calliper_a.n_calls.shape, (2, 1)) def test_reduce(self): """ Test that the reduce() method returns the expected results. """ - # pylint: disable=pointless-statement - self.calliper_a.time_percent = [10.0, 20.0] - self.calliper_a.cumul_time = [30.0, 40.0] - self.calliper_a.self_time = [5.0, 15.0] - self.calliper_a.total_time = [25.0, 35.0] - self.calliper_a.n_calls = [2, 2] + self.calliper_a.time_percent = np.array([10.0, 20.0]) + self.calliper_a.cumul_time = np.array([30.0, 40.0]) + self.calliper_a.self_time = np.array([5.0, 15.0]) + self.calliper_a.total_time = np.array([25.0, 35.0]) + self.calliper_a.n_calls = np.array([2, 2]) reduced_data = self.calliper_a.reduce() self.assertEqual(reduced_data[0], "test_calliper_a") @@ -63,18 +63,17 @@ def test_compare(self): Test that the comparison of dataclasses works as expected for the VernierCalliper class. """ - # pylint: disable=pointless-statement - self.calliper_a.time_percent = [10.0, 20.0] - self.calliper_a.cumul_time = [30.0, 40.0] - self.calliper_a.self_time = [5.0, 15.0] - self.calliper_a.total_time = [25.0, 35.0] - self.calliper_a.n_calls = [2, 2] + self.calliper_a.time_percent = np.array([10.0, 20.0]) + self.calliper_a.cumul_time = np.array([30.0, 40.0]) + self.calliper_a.self_time = np.array([5.0, 15.0]) + self.calliper_a.total_time = np.array([25.0, 35.0]) + self.calliper_a.n_calls = np.array([2, 2]) - self.calliper_b.time_percent = [12.0, 25.0] - self.calliper_b.cumul_time = [35.0, 46.0] - self.calliper_b.self_time = [6.0, 19.0] - self.calliper_b.total_time = [28.0, 39.0] - self.calliper_b.n_calls = [2, 2] + self.calliper_b.time_percent = np.array([12.0, 25.0]) + self.calliper_b.cumul_time = np.array([35.0, 46.0]) + self.calliper_b.self_time = np.array([6.0, 19.0]) + self.calliper_b.total_time = np.array([28.0, 39.0]) + self.calliper_b.n_calls = np.array([2, 2]) self.assertTrue(self.calliper_a < self.calliper_b) self.assertFalse(self.calliper_a > self.calliper_b) diff --git a/post-processing/tests/test_vernier_data.py b/post-processing/tests/test_vernier_data.py index 30164698..2deb984a 100644 --- a/post-processing/tests/test_vernier_data.py +++ b/post-processing/tests/test_vernier_data.py @@ -8,6 +8,7 @@ """ from pathlib import Path from io import StringIO +import numpy as np import tempfile import unittest import sys @@ -32,21 +33,16 @@ def test_add_empty_calliper(self): Tests that a VernierCalliper instance is correctly added to the VernierData instance. """ - self.test_data.add_calliper("test_calliper") + self.test_data.add_calliper("test_calliper", 1, 1) self.assertIn("test_calliper", self.test_data.data) - self.assertEqual(self.test_data.data["test_calliper"].time_percent, []) - self.assertEqual(self.test_data.data["test_calliper"].cumul_time, []) - self.assertEqual(self.test_data.data["test_calliper"].self_time, []) - self.assertEqual(self.test_data.data["test_calliper"].total_time, []) - self.assertEqual(self.test_data.data["test_calliper"].n_calls, []) def test_filter_calliper(self): """ Tests that callipers that do not match the pattern pased to `VernierData.filter()` are not returned. """ - self.test_data.add_calliper("timestep_calliper") - self.test_data.add_calliper("other_calliper") + self.test_data.add_calliper("timestep_calliper", 1, 1) + self.test_data.add_calliper("other_calliper", 1, 1) filtered = self.test_data.filter(["timestep"]) self.assertIn("timestep_calliper", filtered.data) self.assertNotIn("other_calliper", filtered.data) @@ -56,7 +52,7 @@ def test_filter_no_match(self): Test that an error is raised when no callipers are found when calling the `VernierData.filter()` method. """ - self.test_data.add_calliper("timestep_calliper") + self.test_data.add_calliper("timestep_calliper", 1, 1) with self.assertRaises(ValueError): self.test_data.filter(["nonexistent"]) @@ -65,8 +61,8 @@ def test_filter_multiple_matches(self): Tests that filter correctly returns all callipers with a substring equal to the pattern passed to `VernierData.filter()` """ - self.test_data.add_calliper("timestep_calliper_1") - self.test_data.add_calliper("timestep_calliper_2") + self.test_data.add_calliper("timestep_calliper_1", 1, 1) + self.test_data.add_calliper("timestep_calliper_2", 1, 1) filtered = self.test_data.filter(["timestep"]) self.assertIn("timestep_calliper_1", filtered.data) self.assertIn("timestep_calliper_2", filtered.data) @@ -76,7 +72,7 @@ def test_filter_empty_keys(self): Test that an error is raised when no filters are passed to the `VernierData.filter()` method. """ - self.test_data.add_calliper("timestep_calliper") + self.test_data.add_calliper("timestep_calliper", 1, 1) with self.assertRaises(ValueError): self.test_data.filter([]) @@ -85,32 +81,32 @@ def test_write_txt_output_file(self): Test that the formatting of write_txt_ouput is as expected when writing to a file. """ - self.test_data.add_calliper("test_calliper") - self.test_data.data["test_calliper"].time_percent = [10.0, 20.0] - self.test_data.data["test_calliper"].cumul_time = [30.0, 40.0] - self.test_data.data["test_calliper"].self_time = [5.0, 15.0] - self.test_data.data["test_calliper"].total_time = [25.0, 35.0] - self.test_data.data["test_calliper"].n_calls = [2] + self.test_data.add_calliper("test_calliper", 2, 1) + self.test_data.data["test_calliper"].time_percent = np.array([10.0, 20.0]) + self.test_data.data["test_calliper"].cumul_time = np.array([30.0, 40.0]) + self.test_data.data["test_calliper"].self_time = np.array([5.0, 15.0]) + self.test_data.data["test_calliper"].total_time = np.array([25.0, 35.0]) + self.test_data.data["test_calliper"].n_calls = np.array([2, 2]) # pylint: disable=unspecified-encoding with tempfile.NamedTemporaryFile(delete=False) as tmp_file: self.test_data.write_txt_output(Path(tmp_file.name)) contents = Path(tmp_file.name).read_text().splitlines() # pylint: disable=line-too-long - self.assertEqual("| Routine | Total time (s) | Self (s) | Cumul time (s) | No. calls | % time | Time per call (s) |", contents[0]) - self.assertEqual("| test_calliper | 30.0 | 10.0 | 35.0 | 2 | 15.0 | 15.0 |", contents[1]) + self.assertEqual("| Routine | Total time (s) | Self (s) | Cumul time (s) | Max no. calls | % time | Time per call (s) |", contents[0]) + self.assertEqual("| test_calliper | 30.0 | 10.0 | 35.0 | 2 | 15.0 | 15.0 |", contents[1]) def test_write_txt_output_terminal(self): """ Test that the formatting of write_txt_ouput is as expected when writing to the terminal. """ - self.test_data.add_calliper("test_calliper") - self.test_data.data["test_calliper"].time_percent = [50.0, 40.0] - self.test_data.data["test_calliper"].cumul_time = [10.0, 12.0] - self.test_data.data["test_calliper"].self_time = [3.0, 4.0] - self.test_data.data["test_calliper"].total_time = [15.0, 55.0] - self.test_data.data["test_calliper"].n_calls = [2] + self.test_data.add_calliper("test_calliper", 2, 1) + self.test_data.data["test_calliper"].time_percent = np.array([50.0, 40.0]) + self.test_data.data["test_calliper"].cumul_time = np.array([10.0, 12.0]) + self.test_data.data["test_calliper"].self_time = np.array([3.0, 4.0]) + self.test_data.data["test_calliper"].total_time = np.array([15.0, 55.0]) + self.test_data.data["test_calliper"].n_calls = np.array([2, 2]) write_output = StringIO() sys.stdout = write_output @@ -118,102 +114,21 @@ def test_write_txt_output_terminal(self): sys.stdout = sys.__stdout__ # pylint: disable=line-too-long - self.assertEqual("| Routine | Total time (s) | Self (s) | Cumul time (s) | No. calls | % time | Time per call (s) |", write_output.getvalue().splitlines()[0]) - self.assertEqual("| test_calliper | 35.0 | 3.5 | 11.0 | 2 | 45.0 | 17.5 |", write_output.getvalue().splitlines()[1]) + self.assertEqual("| Routine | Total time (s) | Self (s) | Cumul time (s) | Max no. calls | % time | Time per call (s) |", write_output.getvalue().splitlines()[0]) + self.assertEqual("| test_calliper | 35.0 | 3.5 | 11.0 | 2 | 45.0 | 17.5 |", write_output.getvalue().splitlines()[1]) - def test_aggregate(self): - """ - Tests that aggregate returns a single VernierData object with all - expected callipers and data. - """ - data1 = VernierData() - data1.add_calliper("calliper_a") - data1.data["calliper_a"].time_percent = [10.0, 20.0] - data1.data["calliper_a"].cumul_time = [30.0, 40.0] - data1.data["calliper_a"].self_time = [5.0, 15.0] - data1.data["calliper_a"].total_time = [25.0, 35.0] - data1.data["calliper_a"].n_calls = [2, 2] - - data2 = VernierData() - data2.add_calliper("calliper_a") - data2.data["calliper_a"].time_percent = [15.0, 25.0] - data2.data["calliper_a"].cumul_time = [35.0, 45.0] - data2.data["calliper_a"].self_time = [6.0, 16.0] - data2.data["calliper_a"].total_time = [28.0, 38.0] - data2.data["calliper_a"].n_calls = [3, 3] - - aggregated = VernierData() - aggregated.aggregate([data1, data2]) - self.assertIn("calliper_a", aggregated.data) - self.assertEqual( - aggregated.data["calliper_a"].time_percent, - [10.0, 20.0, 15.0, 25.0]) - self.assertEqual( - aggregated.data["calliper_a"].cumul_time, - [30.0, 40.0, 35.0, 45.0]) - self.assertEqual( - aggregated.data["calliper_a"].self_time, - [5.0, 15.0, 6.0, 16.0]) - self.assertEqual( - aggregated.data["calliper_a"].total_time, - [25.0, 35.0, 28.0, 38.0]) - self.assertEqual( - aggregated.data["calliper_a"].n_calls, - [2, 2, 3, 3]) - - def test_aggregate_inconsistent(self): - """ - Tests that an error is raised when VernierData objects with mismatched - callipers are passed to aggregate with internal consistency constraints - turned on. - """ - data1 = VernierData() - data1.add_calliper("calliper_a") - data1.data["calliper_a"].time_percent = [10.0, 20.0] - data1.data["calliper_a"].cumul_time = [30.0, 40.0] - data1.data["calliper_a"].self_time = [5.0, 15.0] - data1.data["calliper_a"].total_time = [25.0, 35.0] - data1.data["calliper_a"].n_calls = [2, 2] - - data2 = VernierData() - data2.add_calliper("calliper_b") - data2.data["calliper_b"].time_percent = [15.0, 25.0] - data2.data["calliper_b"].cumul_time = [35.0, 45.0] - data2.data["calliper_b"].self_time = [6.0, 16.0] - data2.data["calliper_b"].total_time = [28.0, 38.0] - data2.data["calliper_b"].n_calls = [3, 3] - - with self.assertRaises(ValueError): - aggregated = VernierData() - aggregated.aggregate([data1, data2]) - - def test_aggregate_inconsistent_ok(self): - """ - Test that combining VernierData objects without forced consistency - of callipers works as expected (both callipers present in output). - """ - data1 = VernierData() - data1.add_calliper("calliper_a") - - data2 = VernierData() - data2.add_calliper("calliper_b") - - aggregated = VernierData() - aggregated.aggregate([data1, data2], internal_consistency=False) - self.assertIn("calliper_a", aggregated.data) - self.assertIn("calliper_b", aggregated.data) def test_get(self): """ Test that the get method of the VernierData class works as expected. """ data1 = VernierData() - data1.add_calliper("calliper_a") - data1.data["calliper_a"].time_percent = [10.0, 20.0] - data1.data["calliper_a"].cumul_time = [30.0, 40.0] - data1.data["calliper_a"].self_time = [5.0, 15.0] - data1.data["calliper_a"].total_time = [25.0, 35.0] - data1.data["calliper_a"].n_calls = [2, 2] + data1.add_calliper("calliper_a", 2, 1) + data1.data["calliper_a"].time_percent = np.array([10.0, 20.0]) + data1.data["calliper_a"].cumul_time = np.array([30.0, 40.0]) + data1.data["calliper_a"].self_time = np.array([5.0, 15.0]) + data1.data["calliper_a"].total_time = np.array([25.0, 35.0]) + data1.data["calliper_a"].n_calls = np.array([2, 2]) self.assertEqual(len(data1.get("calliper_a")), 2) @@ -227,20 +142,20 @@ def _add_data(self): """ self.collation = VernierDataCollation() data1 = VernierData() - data1.add_calliper("calliper_a") - data1.data["calliper_a"].time_percent = [10.0, 20.0] - data1.data["calliper_a"].cumul_time = [30.0, 40.0] - data1.data["calliper_a"].self_time = [5.0, 15.0] - data1.data["calliper_a"].total_time = [25.0, 35.0] - data1.data["calliper_a"].n_calls = [2, 2] + data1.add_calliper("calliper_a", 2, 1) + data1.data["calliper_a"].time_percent = np.array([[10.0], [20.0]]) + data1.data["calliper_a"].cumul_time = np.array([[30.0], [40.0]]) + data1.data["calliper_a"].self_time = np.array([[5.0], [15.0]]) + data1.data["calliper_a"].total_time = np.array([[25.0], [35.0]]) + data1.data["calliper_a"].n_calls = np.array([[2], [2]]) data2 = VernierData() - data2.add_calliper("calliper_a") - data2.data["calliper_a"].time_percent = [15.0, 25.0] - data2.data["calliper_a"].cumul_time = [35.0, 45.0] - data2.data["calliper_a"].self_time = [6.0, 16.0] - data2.data["calliper_a"].total_time = [28.0, 38.0] - data2.data["calliper_a"].n_calls = [3, 3] + data2.add_calliper("calliper_a", 2, 1) + data2.data["calliper_a"].time_percent = np.array([[15.0], [25.0]]) + data2.data["calliper_a"].cumul_time = np.array([[35.0], [45.0]]) + data2.data["calliper_a"].self_time = np.array([[6.0], [16.0]]) + data2.data["calliper_a"].total_time = np.array([[28.0], [38.0]]) + data2.data["calliper_a"].n_calls = np.array([[3], [3]]) self.collation.add_data('test1', data1) self.collation.add_data('test2', data2) @@ -278,19 +193,19 @@ def test_internal_consistency(self): """ self._add_data() data_inc = VernierData() - data_inc.add_calliper("calliper_a") - data_inc.data["calliper_a"].time_percent = [10.0, 20.0] - data_inc.data["calliper_a"].cumul_time = [30.0, 40.0] - data_inc.data["calliper_a"].self_time = [5.0, 15.0] - data_inc.data["calliper_a"].total_time = [25.0, 35.0] - data_inc.data["calliper_a"].n_calls = [2, 2] - - data_inc.add_calliper("calliper_b") - data_inc.data["calliper_b"].time_percent = [15.0, 25.0] - data_inc.data["calliper_b"].cumul_time = [35.0, 45.0] - data_inc.data["calliper_b"].self_time = [6.0, 16.0] - data_inc.data["calliper_b"].total_time = [28.0, 38.0] - data_inc.data["calliper_b"].n_calls = [3, 3] + data_inc.add_calliper("calliper_a", 2, 1) + data_inc.data["calliper_a"].time_percent = np.array([[10.0], [20.0]]) + data_inc.data["calliper_a"].cumul_time = np.array([[30.0], [40.0]]) + data_inc.data["calliper_a"].self_time = np.array([[5.0], [15.0]]) + data_inc.data["calliper_a"].total_time = np.array([[25.0], [35.0]]) + data_inc.data["calliper_a"].n_calls = np.array([[2], [2]]) + + data_inc.add_calliper("calliper_b", 2, 1) + data_inc.data["calliper_b"].time_percent = np.array([[15.0], [25.0]]) + data_inc.data["calliper_b"].cumul_time = np.array([[35.0], [45.0]]) + data_inc.data["calliper_b"].self_time = np.array([[6.0], [16.0]]) + data_inc.data["calliper_b"].total_time = np.array([[28.0], [38.0]]) + data_inc.data["calliper_b"].n_calls = np.array([[3], [3]]) with self.assertRaises(ValueError) as test_exception: self.collation.add_data('test3', data_inc) diff --git a/post-processing/tests/test_vernier_reader.py b/post-processing/tests/test_vernier_reader.py index 66c4da57..3d3bd83a 100644 --- a/post-processing/tests/test_vernier_reader.py +++ b/post-processing/tests/test_vernier_reader.py @@ -5,6 +5,7 @@ # ------------------------------------------------------------------------------ from pathlib import Path import unittest +import numpy as np import sys # pylint: disable=wrong-import-position @@ -32,33 +33,55 @@ def test_load_from_file(self): loaded_data = test_reader.load() self.assertIn("__test_app__", loaded_data.data) self.assertIn("some_process", loaded_data.data) - self.assertEqual(loaded_data.data["__test_app__"].n_calls, [1, 1]) - self.assertEqual(loaded_data.data["some_process"].n_calls, [2, 2]) - self.assertEqual(loaded_data.data["__test_app__"].time_percent, [44.130, 37.835]) - self.assertEqual(loaded_data.data["some_process"].time_percent, [34.563, 43.991]) - self.assertEqual(loaded_data.data["__test_app__"].cumul_time, [2.583, 5.596]) - self.assertEqual(loaded_data.data["some_process"].cumul_time, [4.606, 3.009]) - self.assertEqual(loaded_data.data["__test_app__"].self_time, [2.583, 2.588]) - self.assertEqual(loaded_data.data["some_process"].self_time, [2.023, 3.009]) - self.assertEqual(loaded_data.data["__test_app__"].total_time, [5.854, 6.839]) - self.assertEqual(loaded_data.data["some_process"].total_time,[2.077, 3.069]) + self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].n_calls.flatten(), np.array([1, 1]))) + self.assertTrue(np.array_equal(loaded_data.data["some_process"].n_calls.flatten(), np.array([2, 2]))) + self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].time_percent.flatten(), np.array([44.130, 37.835]))) + self.assertTrue(np.array_equal(loaded_data.data["some_process"].time_percent.flatten(), np.array([34.563, 43.991]))) + self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].cumul_time.flatten(), np.array([2.583, 5.596]))) + self.assertTrue(np.array_equal(loaded_data.data["some_process"].cumul_time.flatten(), np.array([4.606, 3.009]))) + self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].self_time.flatten(), np.array([2.583, 2.588]))) + self.assertTrue(np.array_equal(loaded_data.data["some_process"].self_time.flatten(), np.array([2.023, 3.009]))) + self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].total_time.flatten(), np.array([5.854, 6.839]))) + self.assertTrue(np.array_equal(loaded_data.data["some_process"].total_time.flatten(), np.array([2.077, 3.069]))) def test_load_from_directory(self): test_reader = VernierReader(self.test_data_dir / "vernier-output") loaded_data = test_reader.load() self.assertIn("__test_app__", loaded_data.data) self.assertIn("some_process", loaded_data.data) - self.assertCountEqual(loaded_data.data["__test_app__"].n_calls, [1, 1]) - self.assertCountEqual(loaded_data.data["some_process"].n_calls, [2, 2]) - self.assertCountEqual(loaded_data.data["__test_app__"].time_percent, [44.130, 37.835]) - self.assertCountEqual(loaded_data.data["some_process"].time_percent, [34.563, 43.991]) - self.assertCountEqual(loaded_data.data["__test_app__"].cumul_time, [2.583, 5.596]) - self.assertCountEqual(loaded_data.data["some_process"].cumul_time, [4.606, 3.009]) - self.assertCountEqual(loaded_data.data["__test_app__"].self_time, [2.583, 2.588]) - self.assertCountEqual(loaded_data.data["some_process"].self_time, [2.023, 3.009]) - self.assertCountEqual(loaded_data.data["__test_app__"].total_time, [5.854, 6.839]) - self.assertCountEqual(loaded_data.data["some_process"].total_time,[2.077, 3.069]) + self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].n_calls.flatten(), np.array([1, 1]))) + self.assertTrue(np.array_equal(loaded_data.data["some_process"].n_calls.flatten(), np.array([2, 2]))) + self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].time_percent.flatten(), np.array([44.130, 37.835]))) + self.assertTrue(np.array_equal(loaded_data.data["some_process"].time_percent.flatten(), np.array([34.563, 43.991]))) + self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].cumul_time.flatten(), np.array([2.583, 5.596]))) + self.assertTrue(np.array_equal(loaded_data.data["some_process"].cumul_time.flatten(), np.array([4.606, 3.009]))) + self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].self_time.flatten(), np.array([2.583, 2.588]))) + self.assertTrue(np.array_equal(loaded_data.data["some_process"].self_time.flatten(), np.array([2.023, 3.009]))) + self.assertTrue(np.array_equal(loaded_data.data["__test_app__"].total_time.flatten(), np.array([5.854, 6.839]))) + self.assertTrue(np.array_equal(loaded_data.data["some_process"].total_time.flatten(), np.array([2.077, 3.069]))) + def test_load_threaded_data_from_directory(self): + test_reader = VernierReader(self.test_data_dir / "vernier-output-threaded") + loaded_data = test_reader.load() + self.assertIn("FULL", loaded_data.data) + self.assertIn("MAIN_SUB", loaded_data.data) + self.assertIn("MAIN_SUB2", loaded_data.data) + + self.assertTrue(np.array_equal(loaded_data.data["FULL"].n_calls, np.array([[1], [1]]))) + self.assertTrue(np.array_equal(loaded_data.data["FULL"].time_percent, np.array([[40.0], [40.031]]))) + self.assertTrue(np.array_equal(loaded_data.data["FULL"].self_time, np.array([[2.0], [2.004]]))) + self.assertTrue(np.array_equal(loaded_data.data["FULL"].total_time, np.array([[5.0], [5.006]]))) + self.assertTrue(np.array_equal(loaded_data.data["FULL"].cumul_time, np.array([[4.0], [2.004]]))) + self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB"].n_calls, np.array([[2, 1, 1, 1], [2, 1, 1, 1]]))) + self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB"].time_percent, np.array([[40.0, 20.0, 20.0, 20.0], [39.956, 19.978, 19.977, 19.978]]))) + self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB"].self_time, np.array([[2.0, 1.0, 1.0, 1.0], [2.0, 1.0, 1.0, 1.0]]))) + self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB"].total_time, np.array([[3.0, 2.0, 2.0, 2.0], [3.002, 2.004, 2.0, 2.002]]))) + self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB"].cumul_time, np.array([[2.0, 11.001, 10.001, 6.0], [4.004, 9.012, 11.012, 10.012]]))) + self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB2"].n_calls, np.array([[1, 1, 1, 1], [1, 1, 1, 1]]))) + self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB2"].time_percent, np.array([[20.0, 20.0, 20.0, 20.0], [20.012, 20.056, 19.979, 20.017]]))) + self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB2"].self_time, np.array([[1.0, 1.0, 1.0, 1.0], [1.002, 1.004, 1.0, 1.002]]))) + self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB2"].total_time, np.array([[1.0, 1.0, 1.0, 1.0], [1.002, 1.004, 1.0, 1.002]]))) + self.assertTrue(np.array_equal(loaded_data.data["MAIN_SUB2"].cumul_time, np.array([[7.001, 9.001, 8.001, 5.0], [7.012, 5.008, 8.012, 6.01]]))) if __name__ == '__main__': unittest.main() diff --git a/post-processing/vernier/vernier_data.py b/post-processing/vernier/vernier_data.py index 076ffdc5..78f82c21 100644 --- a/post-processing/vernier/vernier_data.py +++ b/post-processing/vernier/vernier_data.py @@ -12,22 +12,22 @@ from pathlib import Path from typing import Optional - -@dataclass(order=True) class VernierCalliper(): """ Class to hold data for a single Vernier calliper, including arrays for each metric. - """ - total_time: list[float] - time_percent: list[float] - self_time: list[float] - cumul_time: list[float] - n_calls: list[int] + + self_time: np.ndarray + total_time: np.ndarray + time_percent: np.ndarray + cumul_time: np.ndarray + n_calls: np.ndarray + n_ranks: int + n_threads: int name: str - def __init__(self, name: str): + def __init__(self, name: str, n_ranks: int, n_threads: int = 1): """ Initialise the VernierCalliper instance with lists ready to be filled with calliper data. @@ -36,17 +36,19 @@ def __init__(self, name: str): """ self.name = name - self.time_percent = [] - self.cumul_time = [] - self.self_time = [] - self.total_time = [] - self.n_calls = [] + self.ranks = n_ranks + self.threads = n_threads + self.time_percent = np.full((n_ranks, n_threads), np.nan) + self.cumul_time = np.full((n_ranks, n_threads), np.nan) + self.self_time = np.full((n_ranks, n_threads), np.nan) + self.total_time = np.full((n_ranks, n_threads), np.nan) + self.n_calls = np.full((n_ranks, n_threads), np.nan) return def __len__(self): """ - Return None if caliper elements differ in length, + Return None if calliper elements differ in length, otherwise return element lengths. """ result = None @@ -64,19 +66,33 @@ def reduce(self) -> list: """ return [ - self.name.replace('@0', ''), # calliper name - round(np.mean(self.total_time), 5), # mean total time across calls - round(np.mean(self.self_time), 5), # mean self time across calls - round(np.mean(self.cumul_time), 5), # mean cumulative time across calls - self.n_calls[0], # number of calls (should be the same for all entries, so just take the first) - round(np.mean(self.time_percent), 5), # mean percentage of time across calls - round(np.mean(self.total_time) / self.n_calls[0], 5) # mean time per call + self.name, # calliper name + # We use nanmean to avoid NaN's where array shapes are non-conformant + # due to collated data possibly having different numbers of threads + round(np.nanmean(self.total_time), 5), # mean total time across calls + round(np.nanmean(self.self_time), 5), # mean self time across calls + round(np.nanmean(self.cumul_time), 5), # mean cumulative time across calls + int(np.nanmax(self.n_calls)) , # number of calls + round(np.nanmean(self.time_percent), 5), # mean percentage of time across calls + round(np.nanmean(self.total_time / self.n_calls), 5) # mean time per call ] @classmethod def labels(self): return ["Routine", "Total time (s)", "Self (s)", "Cumul time (s)", - "No. calls", "% time", "Time per call (s)"] + "Max no. calls", "% time", "Time per call (s)"] + + def __lt__(self, other): + """Comparison method for sorting callipers by self time.""" + return self.self_time.sum() < other.self_time.sum() + + def __eq__(self, other): + """Equality method for comparing callipers by self time.""" + return self.self_time.sum() == other.self_time.sum() + + def __gt__(self, other): + """Comparison method for sorting callipers by self time.""" + return self.self_time.sum() > other.self_time.sum() class VernierData(): @@ -94,7 +110,7 @@ def __init__(self): """ self.data = {} - def add_calliper(self, calliper_key: str): + def add_calliper(self, calliper_key: str, n_ranks: int, n_threads: int): """ Adds a new calliper to the data structure, with empty arrays for each metric. @@ -104,7 +120,7 @@ def add_calliper(self, calliper_key: str): """ # Create empty data arrays - self.data[calliper_key] = VernierCalliper(calliper_key) + self.data[calliper_key] = VernierCalliper(calliper_key, n_ranks, n_threads) def filter(self, calliper_keys: list[str]): """ @@ -153,13 +169,7 @@ def write_txt_output(self, txt_path: Optional[Path] = None): # sort by self time, descending txt_table = sorted(txt_table, key=lambda x: x[2], reverse=True) - txt_table.insert(0, ["Routine", - "Total time (s)", - "Self (s)", - "Cumul time (s)", - "No. calls", - "% time", - "Time per call (s)"]) + txt_table.insert(0, VernierCalliper.labels()) max_calliper_len = max([len(line[0]) for line in txt_table]) @@ -170,12 +180,12 @@ def write_txt_output(self, txt_path: Optional[Path] = None): out = open(txt_path, 'w') for row in txt_table: - out.write('| {:>{}} | {:>14} | {:>12} | {:>14} | {:>9} | {:>8} | {:>17} |\n'.format(row[0], max_calliper_len, *row[1:])) + out.write('| {:>{}} | {:>14} | {:>12} | {:>14} | {:>13} | {:>8} | {:>17} |\n'.format(row[0], max_calliper_len, *row[1:])) if txt_path is not None: out.close() - def get(self, calliper_key): + def get(self, calliper_key) -> VernierCalliper: """ Return a VernierCalliper of the data for this calliper_key, or None if it does not exist. @@ -183,54 +193,6 @@ def get(self, calliper_key): return self.data.get(calliper_key, None) - def aggregate(self, vernier_data_list=None, internal_consistency=True): - """ - Aggregates a list of VernierData objects into a single VernierData - object by concatenating the data for each calliper across the input - objects. - - :param vernier_data_list: A list of VernierData objects to combine. - :type vernier_datalist: list[:py:class:`vernier.VernierData`] - - :param bool internal_conistency: If set to True (default), callipers - between all items in the - vernier_data_list must be identical. - - :returns: A single VernierData object containing the data from all - VernierData objects in vernier_data_list. - :rtype: :py:class:`vernier.VernierData` - - :raises ValueError: if internal_consistency is set to True and callipers - between items in vernier_data_list are not - identical. - - """ - if vernier_data_list is None: - vernier_data_list = [] - if internal_consistency: - # Check that all input VernierData objects have the same - # set of callipers - calliper_sets = [set(vernier_data.data.keys()) for vernier_data in - vernier_data_list] - if not all(calliper_set == calliper_sets[0] for - calliper_set in calliper_sets): - raise ValueError("Input VernierData objects do not have the " - "same set of callipers, but " - "internal_consistency is set to True.") - - for vernier_data in vernier_data_list: - for calliper in vernier_data.data.keys(): - if not calliper in self.data: - self.add_calliper(calliper) - - self.data[calliper].time_percent.extend(vernier_data.data[calliper].time_percent) - self.data[calliper].cumul_time.extend(vernier_data.data[calliper].cumul_time) - self.data[calliper].self_time.extend(vernier_data.data[calliper].self_time) - self.data[calliper].total_time.extend(vernier_data.data[calliper].total_time) - self.data[calliper].n_calls.extend(vernier_data.data[calliper].n_calls) - - - class VernierDataCollation(): """ Class to hold an collation of VernierData instances. @@ -294,7 +256,6 @@ def internal_consistency(self, new_vernier_data=None): consistent callipers. """ - # notImplemented enforce consistent sizing of members?? needed? callipers = [] # Loop over VernierData objects for _, vdata in self.vernier_data.items(): @@ -339,7 +300,7 @@ def calliper_list(self): break return result - def get(self, calliper_key): + def get(self, calliper_key) -> VernierCalliper: """ Return a VernierCalliper of all the data from all collation members for this calliper_key, or None if it does not exist. @@ -355,12 +316,18 @@ def get(self, calliper_key): if calliper_key not in self.calliper_list(): return None self.internal_consistency() - results = VernierCalliper(calliper_key) + + n_ranks = sum([vdata.data[calliper_key].ranks for _, vdata in self.vernier_data.items()]) + n_threads = max([vdata.data[calliper_key].threads for _, vdata in self.vernier_data.items()]) + results = VernierCalliper(calliper_key, n_ranks, n_threads) + rank_begin_index = 0 for _, vdata in self.vernier_data.items(): - results.total_time += vdata.data[calliper_key].total_time - results.time_percent += vdata.data[calliper_key].time_percent - results.self_time += vdata.data[calliper_key].self_time - results.cumul_time += vdata.data[calliper_key].cumul_time - results.n_calls += vdata.data[calliper_key].n_calls + rank_end_index = rank_begin_index + vdata.data[calliper_key].ranks + results.total_time[rank_begin_index:rank_end_index][:] = vdata.data[calliper_key].total_time + results.time_percent[rank_begin_index:rank_end_index][:] = vdata.data[calliper_key].time_percent + results.self_time[rank_begin_index:rank_end_index][:] = vdata.data[calliper_key].self_time + results.cumul_time[rank_begin_index:rank_end_index][:] = vdata.data[calliper_key].cumul_time + results.n_calls[rank_begin_index:rank_end_index][:] = vdata.data[calliper_key].n_calls + rank_begin_index = rank_end_index return results diff --git a/post-processing/vernier/vernier_reader.py b/post-processing/vernier/vernier_reader.py index dc27dff6..d9766ae2 100644 --- a/post-processing/vernier/vernier_reader.py +++ b/post-processing/vernier/vernier_reader.py @@ -32,23 +32,44 @@ def _load_from_file(self) -> VernierData: """ loaded = VernierData() + n_ranks_total = None + rank = 0 # Populate data - contents = self.path.read_text().splitlines() - for line in contents: + file_contents = self.path.read_text() + n_ranks_in_file = file_contents.count("Task") # Number of ranks worth of data in file only countable this way + calliper_ids = [line.split()[-1] for line in file_contents.splitlines() if line.split() and line.split()[0].isdigit()] + + file_data = file_contents.splitlines() + for line in file_data: sline = line.split() if len(sline) > 0: # Line contains data + if "Task" in sline: + if n_ranks_total is None: + n_ranks_total = int(sline[3]) + if not n_ranks_in_file in [1, n_ranks_total]: + raise ValueError(f"Input data not consistent with either collated or multi-file vernier output.") + rank = int(sline[-1]) # Extract rank number from the data line if sline[0].isdigit(): # Calliper lines start with a digit - calliper = sline[-1] + if n_ranks_in_file == 1: + rank_index = 0 + else: + rank_index = rank + + calliper, thread = sline[-1].split('@') + thread_id = int(thread) + + n_threads = int(len([cal_id for cal_id in calliper_ids if f"{calliper}@" in cal_id]) / n_ranks_in_file) + if not calliper in loaded.data: - loaded.add_calliper(calliper) + loaded.add_calliper(calliper, n_ranks_in_file, n_threads) - loaded.data[calliper].time_percent.append(float(sline[1])) - loaded.data[calliper].cumul_time.append(float(sline[2])) - loaded.data[calliper].self_time.append(float(sline[3])) - loaded.data[calliper].total_time.append(float(sline[4])) - loaded.data[calliper].n_calls.append(int(sline[5])) + loaded.data[calliper].time_percent[rank_index, thread_id] = float(sline[1]) + loaded.data[calliper].cumul_time[rank_index, thread_id] = float(sline[2]) + loaded.data[calliper].self_time[rank_index, thread_id] = float(sline[3]) + loaded.data[calliper].total_time[rank_index, thread_id] = float(sline[4]) + loaded.data[calliper].n_calls[rank_index, thread_id] = int(sline[5]) if not loaded.data: raise ValueError(f"No calliper data found in file '{self.path}'.") @@ -73,7 +94,29 @@ def _load_from_directory(self) -> VernierData: ) result = VernierData() - result.aggregate(vernier_datasets) + + # Check that all input VernierData objects have the same + # set of callipers + calliper_sets = [set(vernier_data.data.keys()) for vernier_data in + vernier_datasets] + if not all(calliper_set == calliper_sets[0] for + calliper_set in calliper_sets): + raise ValueError("Input VernierData objects do not have the " + "same set of callipers, but " + "internal_consistency is set to True.") + + for i, vernier_data in enumerate(vernier_datasets): + for calliper in vernier_data.data.keys(): + if not calliper in result.data: + _, n_threads = vernier_data.data[calliper].self_time.shape + result.add_calliper(calliper, len(vernier_datasets), n_threads) + + result.data[calliper].time_percent[i] = vernier_data.data[calliper].time_percent[0] + result.data[calliper].cumul_time[i] = vernier_data.data[calliper].cumul_time[0] + result.data[calliper].self_time[i] = vernier_data.data[calliper].self_time[0] + result.data[calliper].total_time[i] = vernier_data.data[calliper].total_time[0] + result.data[calliper].n_calls[i] = vernier_data.data[calliper].n_calls[0] + return result def load(self) -> VernierData: