From 934902a44b6b8b372cfdae3c0161cfc49ab6f0e4 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Mon, 11 Mar 2024 09:45:00 +0100 Subject: [PATCH 1/6] Add partitioners property to FederatedDataset --- datasets/flwr_datasets/federated_dataset.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datasets/flwr_datasets/federated_dataset.py b/datasets/flwr_datasets/federated_dataset.py index c40f8cc34857..2104a36167d6 100644 --- a/datasets/flwr_datasets/federated_dataset.py +++ b/datasets/flwr_datasets/federated_dataset.py @@ -161,6 +161,11 @@ def load_full(self, split: str) -> Dataset: self._check_if_split_present(split) return self._dataset[split] + @property + def partitioners(self) -> Dict[str, Partitioner]: + """Split to associated partitioners dictionary.""" + return self._partitioners + def _check_if_split_present(self, split: str) -> None: """Check if the split (for partitioning or full return) is in the dataset.""" if self._dataset is None: From 3c9885d737dd8a41eb2d5052a34ad7e55c830247 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Tue, 12 Mar 2024 23:12:10 +0000 Subject: [PATCH 2/6] Trigger dataset download by partitioners property --- datasets/flwr_datasets/federated_dataset.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/federated_dataset.py b/datasets/flwr_datasets/federated_dataset.py index 2104a36167d6..d61cd5efd88b 100644 --- a/datasets/flwr_datasets/federated_dataset.py +++ b/datasets/flwr_datasets/federated_dataset.py @@ -163,7 +163,15 @@ def load_full(self, split: str) -> Dataset: @property def partitioners(self) -> Dict[str, Partitioner]: - """Split to associated partitioners dictionary.""" + """Dictionary mapping split to associated partitioners.""" + if not self._dataset_prepared: + self._prepare_dataset() + if self._dataset is None: + raise ValueError("Dataset is not loaded yet.") + partitioners_keys = list(self._partitioners.keys()) + for split in partitioners_keys: + self._check_if_split_present(split) + self._assign_dataset_to_partitioner(split) return self._partitioners def _check_if_split_present(self, split: str) -> None: From 07a825ea35fe2e03d55c231a86e6dccbe3f1609b Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Wed, 13 Mar 2024 21:11:37 +0000 Subject: [PATCH 3/6] Update the partitioners property docs --- datasets/flwr_datasets/federated_dataset.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/federated_dataset.py b/datasets/flwr_datasets/federated_dataset.py index d61cd5efd88b..585d6f06e77f 100644 --- a/datasets/flwr_datasets/federated_dataset.py +++ b/datasets/flwr_datasets/federated_dataset.py @@ -163,7 +163,13 @@ def load_full(self, split: str) -> Dataset: @property def partitioners(self) -> Dict[str, Partitioner]: - """Dictionary mapping split to associated partitioners.""" + """Dictionary mapping split to associated partitioners. + + The returned partitioners have the splits of the dataset assigned to them. + """ + # This function trigger the dataset download (laizy download) and checks + # the partitioner specification correctness (which can also happen lazily only + # after the dataset download). if not self._dataset_prepared: self._prepare_dataset() if self._dataset is None: From 745f22fda37ce4141694cc8cbd84531978365967 Mon Sep 17 00:00:00 2001 From: Javier Date: Wed, 13 Mar 2024 21:26:14 +0000 Subject: [PATCH 4/6] Apply suggestions from code review --- datasets/flwr_datasets/federated_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/federated_dataset.py b/datasets/flwr_datasets/federated_dataset.py index 585d6f06e77f..2245200e6c12 100644 --- a/datasets/flwr_datasets/federated_dataset.py +++ b/datasets/flwr_datasets/federated_dataset.py @@ -167,7 +167,7 @@ def partitioners(self) -> Dict[str, Partitioner]: The returned partitioners have the splits of the dataset assigned to them. """ - # This function trigger the dataset download (laizy download) and checks + # This function triggers the dataset download (laizy download) and checks # the partitioner specification correctness (which can also happen lazily only # after the dataset download). if not self._dataset_prepared: From 2d280424ae95deb678794b3131ca219fb65949e2 Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Wed, 13 Mar 2024 21:33:28 +0000 Subject: [PATCH 5/6] Update datasets/flwr_datasets/federated_dataset.py Co-authored-by: Javier --- datasets/flwr_datasets/federated_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/federated_dataset.py b/datasets/flwr_datasets/federated_dataset.py index 2245200e6c12..11179ad80ede 100644 --- a/datasets/flwr_datasets/federated_dataset.py +++ b/datasets/flwr_datasets/federated_dataset.py @@ -163,7 +163,7 @@ def load_full(self, split: str) -> Dataset: @property def partitioners(self) -> Dict[str, Partitioner]: - """Dictionary mapping split to associated partitioners. + """Dictionary mapping each split to its associated partitioner. The returned partitioners have the splits of the dataset assigned to them. """ From 5a394815e9dafd5d79178650e887b4429345a746 Mon Sep 17 00:00:00 2001 From: "Daniel J. Beutel" Date: Thu, 14 Mar 2024 00:02:32 +0100 Subject: [PATCH 6/6] Update datasets/flwr_datasets/federated_dataset.py --- datasets/flwr_datasets/federated_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/federated_dataset.py b/datasets/flwr_datasets/federated_dataset.py index f5698e144979..ed3d03fd1442 100644 --- a/datasets/flwr_datasets/federated_dataset.py +++ b/datasets/flwr_datasets/federated_dataset.py @@ -219,7 +219,7 @@ def partitioners(self) -> Dict[str, Partitioner]: The returned partitioners have the splits of the dataset assigned to them. """ - # This function triggers the dataset download (laizy download) and checks + # This function triggers the dataset download (lazy download) and checks # the partitioner specification correctness (which can also happen lazily only # after the dataset download). if not self._dataset_prepared: