diff --git a/post-processing/tests/test_vernier_calliper.py b/post-processing/tests/test_vernier_caliper.py similarity index 100% rename from post-processing/tests/test_vernier_calliper.py rename to post-processing/tests/test_vernier_caliper.py diff --git a/post-processing/tests/test_vernier_data.py b/post-processing/tests/test_vernier_data.py index d1c0dd66..c4db62bf 100644 --- a/post-processing/tests/test_vernier_data.py +++ b/post-processing/tests/test_vernier_data.py @@ -9,7 +9,7 @@ import unittest import sys sys.path.append(str(Path(__file__).parent.parent)) -from vernier.vernier_data import VernierData, aggregate +from vernier import VernierData, VernierDataCollation class TestVernierData(unittest.TestCase): """ @@ -98,7 +98,8 @@ def test_aggregate(self): data2.data["calliper_a"].total_time = [28.0, 38.0] data2.data["calliper_a"].n_calls = [3, 3] - aggregated = aggregate([data1, data2]) + aggregated = VernierData() + aggregated.aggregate([data1, data2]) self.assertIn("calliper_a", aggregated.data) self.assertEqual(aggregated.data["calliper_a"].time_percent, [10.0, 20.0, 15.0, 25.0]) self.assertEqual(aggregated.data["calliper_a"].cumul_time, [30.0, 40.0, 35.0, 45.0]) @@ -124,7 +125,8 @@ def test_aggregate_inconsistent(self): data2.data["calliper_b"].n_calls = [3, 3] with self.assertRaises(ValueError): - aggregate([data1, data2]) + aggregated = VernierData() + aggregated.aggregate([data1, data2]) def test_aggregate_inconsistent_ok(self): data1 = VernierData() @@ -133,9 +135,83 @@ def test_aggregate_inconsistent_ok(self): data2 = VernierData() data2.add_calliper("calliper_b") - agg_data = aggregate([data1, data2], internal_consistency=False) - self.assertIn("calliper_a", agg_data.data) - self.assertIn("calliper_b", agg_data.data) + aggregated = VernierData() + aggregated.aggregate([data1, data2], internal_consistency=False) + self.assertIn("calliper_a", aggregated.data) + self.assertIn("calliper_b", aggregated.data) + + def test_get(self): + data1 = VernierData() + data1.add_calliper("calliper_a") + data1.data["calliper_a"].time_percent = [10.0, 20.0] + data1.data["calliper_a"].cumul_time = [30.0, 40.0] + data1.data["calliper_a"].self_time = [5.0, 15.0] + data1.data["calliper_a"].total_time = [25.0, 35.0] + data1.data["calliper_a"].n_calls = [2, 2] + self.assertEqual(len(data1.get("calliper_a")), 2) + + +class TestVernierCollation(unittest.TestCase): + """ + Tests for the VernierData Collation class. + """ + def _add_data(self): + self.collation = VernierDataCollation() + data1 = VernierData() + data1.add_calliper("calliper_a") + data1.data["calliper_a"].time_percent = [10.0, 20.0] + data1.data["calliper_a"].cumul_time = [30.0, 40.0] + data1.data["calliper_a"].self_time = [5.0, 15.0] + data1.data["calliper_a"].total_time = [25.0, 35.0] + data1.data["calliper_a"].n_calls = [2, 2] + + data2 = VernierData() + data2.add_calliper("calliper_a") + data2.data["calliper_a"].time_percent = [15.0, 25.0] + data2.data["calliper_a"].cumul_time = [35.0, 45.0] + data2.data["calliper_a"].self_time = [6.0, 16.0] + data2.data["calliper_a"].total_time = [28.0, 38.0] + data2.data["calliper_a"].n_calls = [3, 3] + + self.collation.add_data('test1', data1) + self.collation.add_data('test2', data2) + + def test_add_data(self): + self._add_data() + self.assertEqual(len(self.collation), 2) + + def test_remove_data(self): + self._add_data() + self.collation.remove_data('test1') + self.assertEqual(len(self.collation), 1) + + def test_get(self): + self._add_data() + calliper_a = self.collation.get("calliper_a") + self.assertEqual(len(calliper_a), 4) + + def test_internal_consistency(self): + self._add_data() + data_inc = VernierData() + data_inc.add_calliper("calliper_a") + data_inc.data["calliper_a"].time_percent = [10.0, 20.0] + data_inc.data["calliper_a"].cumul_time = [30.0, 40.0] + data_inc.data["calliper_a"].self_time = [5.0, 15.0] + data_inc.data["calliper_a"].total_time = [25.0, 35.0] + data_inc.data["calliper_a"].n_calls = [2, 2] + + data_inc.add_calliper("calliper_b") + data_inc.data["calliper_b"].time_percent = [15.0, 25.0] + data_inc.data["calliper_b"].cumul_time = [35.0, 45.0] + data_inc.data["calliper_b"].self_time = [6.0, 16.0] + data_inc.data["calliper_b"].total_time = [28.0, 38.0] + data_inc.data["calliper_b"].n_calls = [3, 3] + + with self.assertRaises(ValueError) as test_exception: + self.collation.add_data('test3', data_inc) + self.assertEqual(str(test_exception.exception), + "inconsistent callipers in new_vernier_data") + if __name__ == '__main__': unittest.main() diff --git a/post-processing/vernier/__init__.py b/post-processing/vernier/__init__.py index cde2c156..7d9152bf 100644 --- a/post-processing/vernier/__init__.py +++ b/post-processing/vernier/__init__.py @@ -1,4 +1,7 @@ from .vernier_data import VernierData +from .vernier_data import VernierCalliper +from .vernier_data import VernierDataCollation from .vernier_reader import VernierReader -__all__ = ["VernierData", "VernierReader"] +__all__ = ["VernierData", "VernierReader", + "VernierCalliper", "VernierDataCollation"] diff --git a/post-processing/vernier/vernier_data.py b/post-processing/vernier/vernier_data.py index 5e449c5c..116c10a8 100644 --- a/post-processing/vernier/vernier_data.py +++ b/post-processing/vernier/vernier_data.py @@ -31,6 +31,17 @@ def __init__(self, name: str): return + def __len__(self): + """ + Return None if caliper elements differ in length, + otherwise return element lengths. + """ + result = None + if (len(self.time_percent) == len(self.cumul_time) == + len(self.self_time) == len(self.total_time) == len(self.n_calls)): + result = len(self.time_percent) + return result + def reduce(self) -> list: """Reduces the data for this calliper to a single row of summary data.""" @@ -44,9 +55,18 @@ def reduce(self) -> list: round(np.mean(self.total_time) / self.n_calls[0], 5) # mean time per call ] + @classmethod + def labels(self): + return ["Routine", "Total time (s)", "Self (s)", "Cumul time (s)", + "No. calls", "% time", "Time per call (s)"] + class VernierData(): - """Class to hold Vernier data in a structured way, and provide methods for filtering and outputting the data.""" + """ + Class to hold Vernier data from a single instrumented job in a structured way. + Provides methods for filtering and outputting the data. + + """ def __init__(self): @@ -61,7 +81,6 @@ def add_calliper(self, calliper_key: str): # Create empty data arrays self.data[calliper_key] = VernierCalliper(calliper_key) - def filter(self, calliper_keys: list[str]): """Filters the Vernier data to include only callipers matching the provided keys. The filtering is done in a glob-like fashion, so an input key of "timestep" @@ -106,30 +125,123 @@ def write_txt_output(self, txt_path: Optional[Path] = None): if txt_path is not None: out.close() -def aggregate(vernier_data_list: list[VernierData], internal_consistency: bool = True) -> VernierData: - """ - Aggregates a list of VernierData objects into a single VernierData object, - by concatenating the data for each calliper across the input objects. + def get(self, calliper_key): + """ + Return a VernierCalliper of the data for this calliper_key, + or None if it does not exist. + """ + return self.data.get(calliper_key, None) + + + def aggregate(self, vernier_data_list=None, internal_consistency=True): + """ + Aggregates a list of VernierData objects into a single VernierData + object, by concatenating the data for each calliper across the input + objects. + """ + + if vernier_data_list is None: + vernier_data_list = [] + if internal_consistency: + # Check that all input VernierData objects have the same + # set of callipers + calliper_sets = [set(vernier_data.data.keys()) for vernier_data in + vernier_data_list] + if not all(calliper_set == calliper_sets[0] for + calliper_set in calliper_sets): + raise ValueError("Input VernierData objects do not have the " + "same set of callipers, but " + "internal_consistency is set to True.") + + for vernier_data in vernier_data_list: + for calliper in vernier_data.data.keys(): + if not calliper in self.data: + self.add_calliper(calliper) + + self.data[calliper].time_percent.extend(vernier_data.data[calliper].time_percent) + self.data[calliper].cumul_time.extend(vernier_data.data[calliper].cumul_time) + self.data[calliper].self_time.extend(vernier_data.data[calliper].self_time) + self.data[calliper].total_time.extend(vernier_data.data[calliper].total_time) + self.data[calliper].n_calls.extend(vernier_data.data[calliper].n_calls) + + +class VernierDataCollation(): """ + Class to hold an collation of VernierData instances. + Instances are asserted to be consistent in terms enforced by the + interal_consistency method. - aggregated = VernierData() - - if internal_consistency: - # Check that all input VernierData objects have the same set of callipers - calliper_sets = [set(vernier_data.data.keys()) for vernier_data in vernier_data_list] - if not all(calliper_set == calliper_sets[0] for calliper_set in calliper_sets): - raise ValueError("Input VernierData objects do not have the same set of callipers, " \ - "but internal_consistency is set to True.") - - for vernier_data in vernier_data_list: - for calliper in vernier_data.data.keys(): - if not calliper in aggregated.data: - aggregated.add_calliper(calliper) - - aggregated.data[calliper].time_percent.extend(vernier_data.data[calliper].time_percent) - aggregated.data[calliper].cumul_time.extend(vernier_data.data[calliper].cumul_time) - aggregated.data[calliper].self_time.extend(vernier_data.data[calliper].self_time) - aggregated.data[calliper].total_time.extend(vernier_data.data[calliper].total_time) - aggregated.data[calliper].n_calls.extend(vernier_data.data[calliper].n_calls) + """ + def __init__(self): + self.vernier_data = {} + return - return aggregated + def __len__(self): + return len(self.vernier_data) + + def add_data(self, label, vernier_data): + if label in self.vernier_data: + raise ValueError(f'The label {label} already exists in this ' + 'collation. Please use a different label or ' + 'remove the existing entry.') + if not isinstance(vernier_data, VernierData): + raise TypeError(f'The provided vernier_data is not a VernierData ' + 'object.') + self.internal_consistency(vernier_data) + self.vernier_data[label] = vernier_data + + def remove_data(self, label): + if label not in self.vernier_data: + raise ValueError(f'The label {label} does not exist in this ' + 'collation.') + discarded = self.vernier_data.pop(label) + + def internal_consistency(self, new_vernier_data=None): + """ + Enforce internal consistency, with the same callipers for all members. + """ + # notImplemented enforce consistent sizing of members?? needed? + callipers = [] + for k, vdata in self.vernier_data.items(): + loop_callipers = sorted(list(vdata.data.keys())) + if len(callipers) == 0: + callipers = loop_callipers + else: + if loop_callipers != callipers: + raise ValueError('inconsistent callipers in contents') + if new_vernier_data is not None: + if not isinstance(new_vernier_data, VernierData): + raise TypeError(f'The provided vernier_data is not a ' + 'VernierData object.') + check_callipers = sorted(list(new_vernier_data.data.keys())) + if callipers and check_callipers != callipers: + raise ValueError('inconsistent callipers in new_vernier_data') + + def calliper_list(self): + """Return the list of callipers in this collation.""" + result = [] + self.internal_consistency() + + for k, vdata in self.vernier_data.items(): + result = sorted(list(vdata.data.keys())) + break + return result + + def get(self, calliper_key): + """ + Return a VernierCalliper of all the data from all collation members + for this calliper_key, or None if it does not exist. + + """ + if calliper_key not in self.calliper_list(): + return None + self.internal_consistency() + results = VernierCalliper(calliper_key) + for akey, vdata in self.vernier_data.items(): + results.total_time += vdata.data[calliper_key].total_time + results.time_percent += vdata.data[calliper_key].time_percent + results.self_time += vdata.data[calliper_key].self_time + results.cumul_time += vdata.data[calliper_key].cumul_time + results.n_calls += vdata.data[calliper_key].n_calls + + return results diff --git a/post-processing/vernier/vernier_reader.py b/post-processing/vernier/vernier_reader.py index 0a9355ba..ac015e7f 100644 --- a/post-processing/vernier/vernier_reader.py +++ b/post-processing/vernier/vernier_reader.py @@ -6,7 +6,7 @@ from concurrent import futures from pathlib import Path import os -from .vernier_data import VernierData, aggregate +from .vernier_data import VernierData class VernierReader(): """Class handling the reading of Vernier output files, and converting them into a VernierData object.""" @@ -56,7 +56,9 @@ def _load_from_directory(self) -> VernierData: with futures.ThreadPoolExecutor() as pool: vernier_datasets = list(pool.map(lambda f: VernierReader(self.path / f)._load_from_file(), vernier_files)) - return aggregate(vernier_datasets) + result = VernierData() + result.aggregate(vernier_datasets) + return result def load(self) -> VernierData: