From a21782f4781ff02e31bd325c3bdc39a85fd68a36 Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Tue, 26 Nov 2024 08:34:43 +0100 Subject: [PATCH] docs(datasets) Fix formatting of the DistributionPartitioner docs (#4582) --- .../partitioner/distribution_partitioner.py | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/datasets/flwr_datasets/partitioner/distribution_partitioner.py b/datasets/flwr_datasets/partitioner/distribution_partitioner.py index 86be62b36070..e4182f587cad 100644 --- a/datasets/flwr_datasets/partitioner/distribution_partitioner.py +++ b/datasets/flwr_datasets/partitioner/distribution_partitioner.py @@ -36,21 +36,22 @@ class DistributionPartitioner(Partitioner): # pylint: disable=R0902 in a deterministic pathological manner. The 1st dimension is the number of unique labels and the 2nd-dimension is the number of buckets into which the samples associated with each label will be divided. That is, given a distribution array of - shape, - `num_unique_labels_per_partition` x `num_partitions` - ( `num_unique_labels`, ---------------------------------------------------- ), - `num_unique_labels` - the label_id at the i'th row is assigned to the partition_id based on the following - approach. - - First, for an i'th row, generate a list of `id`s according to the formula: - id = alpha + beta - where, - alpha = (i - num_unique_labels_per_partition + 1) \ - + (j % num_unique_labels_per_partition), - alpha = alpha + (alpha >= 0 ? 0 : num_unique_labels), - beta = num_unique_labels * (j // num_unique_labels_per_partition) - and j in {0, 1, 2, ..., `num_columns`}. Then, sort the list of `id`s in ascending + shape,:: + + `num_unique_labels_per_partition` x `num_partitions` + ( `num_unique_labels`, ---------------------------------------------------- ), + `num_unique_labels` + the label_id at the i'th row is assigned to the partition_id based on the + following approach. + + First, for an i'th row, generate a list of `id`s according to the formula: + id = alpha + beta + where, + alpha = (i - num_unique_labels_per_partition + 1) + + + (j % num_unique_labels_per_partition), + alpha = alpha + (alpha >= 0 ? 0 : num_unique_labels), + beta = num_unique_labels * (j // num_unique_labels_per_partition) + and j in {0, 1, 2, ..., `num_columns`}. Then, sort the list of `id` s in ascending order. The j'th index in this sorted list corresponds to the partition_id that the i'th unique label (and the underlying distribution array value) will be assigned to. So, for a dataset with 10 unique labels and a configuration with 20 partitions and