diff --git a/prtpy/binners.py b/prtpy/binners.py index 8394d6b..a4cb832 100644 --- a/prtpy/binners.py +++ b/prtpy/binners.py @@ -75,7 +75,13 @@ def add_item_to_bin(self, bins:BinsArray, item: Any, bin_index: int)->BinsArray: Return the bins after the addition. """ return bins - + + def remove_item_from_bin(self, bins:BinsArray, bin_index: int, item_index: int)->BinsArray: + sums, lists = bins + value = lists[bin_index][item_index] + sums[bin_index] -= value + del lists[bin_index][item_index] + return bins @abstractmethod def sort_by_ascending_sum(self, bins:BinsArray): @@ -99,6 +105,8 @@ def numbins(self, bins: BinsArray) -> int: """ return None + + @abstractmethod def sums(self, bins: BinsArray) -> Tuple[float]: """ diff --git a/prtpy/objectives.py b/prtpy/objectives.py index 106dec2..ab482c6 100644 --- a/prtpy/objectives.py +++ b/prtpy/objectives.py @@ -144,7 +144,6 @@ def lower_bound(self, sums:list, sum_of_remaining_items:float, are_sums_in_ascen 25.0 >>> MinimizeDifference.lower_bound([10,20,30,40,50], sum_of_remaining_items=45) 15.0 - >>> MinimizeDifference.lower_bound([10,20,30,40,50], sum_of_remaining_items=200) 0.0 >>> MinimizeDifference.lower_bound([0,0,0,0,0], sum_of_remaining_items=54) @@ -156,6 +155,50 @@ def lower_bound(self, sums:list, sum_of_remaining_items:float, are_sums_in_ascen +class MinimizeTheDistanceFromAvg(Objective): + def value_to_minimize(self, sums:list, are_sums_in_ascending_order:bool=False)->float: + avg = sum(sums) / len(sums) + diff_from_avg = 0 + for s in sums: + if (s > avg): + diff_from_avg = diff_from_avg + (s - avg) + return diff_from_avg + def __str__(self) -> str: + return "minimize-the-distance-from-avg" + def lower_bound(self, sums:list, sum_of_remaining_items:float, are_sums_in_ascending_order:bool=False)->float: + """ + First we calculate the final avg including the remaining items. + We try to add values from the remaining sum to the bins that haven't reached the avg yet + (they don't contribute to our final difference because we only take the bins that are more than avg). + If at any point all bins are equal to avg then we just need to divide the remaining sum amongst all bins + and the residue will be given out to random bins (one each). + We calculate and return the difference. + >>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=5) + 28.0 + >>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=20) + 22.0 + >>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=45) + 12.0 + + >>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=200) + 0.0 + >>> MinimizeDistAvg.lower_bound([0,0,0,0,0], sum_of_remaining_items=54) + 0.8 + """ + remaining = sum_of_remaining_items + avg = (sum(sums) + remaining) / len(sums) + diff_from_avg = 0 + for s in sums: + if (s < avg and remaining > 0): + remaining = remaining - min(remaining, int(avg - s)) + if(s > avg): + diff_from_avg = diff_from_avg + (s - avg) + return diff_from_avg + ((remaining % len(sums)) / len(sums)) + +MinimizeDistAvg = MinimizeTheDistanceFromAvg() + + + if __name__ == "__main__": import doctest diff --git a/prtpy/partitioning/complete_greedy.py b/prtpy/partitioning/complete_greedy.py index 9509b24..064dbb1 100644 --- a/prtpy/partitioning/complete_greedy.py +++ b/prtpy/partitioning/complete_greedy.py @@ -10,24 +10,28 @@ explains how to have a set with a custom key. Programmer: Erel Segal-Halevi + Eitan Lichtman added Minimize Distance from Avg Objective """ - +import math from typing import List, Tuple, Callable, Iterator, Any import numpy as np import logging, time + from prtpy import objectives as obj, Binner, BinsArray logger = logging.getLogger(__name__) - def anytime( - binner: Binner, numbins: int, items: List[any], + binner: Binner, numbins: int, items: List[any], relative_value: List[any] = None, objective: obj.Objective = obj.MinimizeDifference, - use_lower_bound: bool = True, # Prune branches whose lower bound (= optimistic value) is at least as large as the current minimum. - use_fast_lower_bound: bool = True, # A faster lower bound, that does not create the branch at all. Useful for min-max and max-min objectives. - use_heuristic_3: bool = False, # An improved stopping condition, applicable for min-max only. Not very useful in experiments. - use_set_of_seen_states: bool = True, + use_lower_bound: bool = True, + # Prune branches whose lower bound (= optimistic value) is at least as large as the current minimum. + use_fast_lower_bound: bool = True, + # A faster lower bound, that does not create the branch at all. Useful for min-max max-min and min-dist-avg objectives. + use_heuristic_3: bool = False, + # An improved stopping condition, applicable for min-max only. Not very useful in experiments. + use_set_of_seen_states: bool = True, time_limit: float = np.inf, ) -> Iterator: """ @@ -41,6 +45,10 @@ def anytime( Bin #0: [6, 5, 4], sum=15.0 Bin #1: [8, 7], sum=15.0 + >>> printbins(anytime(BinnerKeepingContents(), 2, [4,5,6,7,8], [0.3,0.7], objective=obj.MinimizeDistAvg)) + Bin #0: [5, 4], sum=9.0 + Bin #1: [8, 7, 6], sum=21.0 + The following examples are based on: Walter (2013), 'Comparing the minimum completion times of two longest-first scheduling-heuristics'. >>> walter_numbers = [46, 39, 27, 26, 16, 13, 10] @@ -56,6 +64,56 @@ def anytime( Bin #0: [46, 10], sum=56.0 Bin #1: [27, 16, 13], sum=56.0 Bin #2: [39, 26], sum=65.0 + >>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MinimizeDistAvg)) + Bin #0: [39, 16], sum=55.0 + Bin #1: [46, 13], sum=59.0 + Bin #2: [27, 26, 10], sum=63.0 + >>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers,[0.2,0.4,0.4], objective=obj.MinimizeDistAvg)) + Bin #0: [27, 10], sum=37.0 + Bin #1: [39, 16, 13], sum=68.0 + Bin #2: [46, 26], sum=72.0 + + >>> printbins(anytime(BinnerKeepingContents(), 5, [460000000, 390000000, 270000000, 260000000, 160000000, 130000000, 100000000],[0.2,0.4,0.1,0.15,0.15], objective=obj.MinimizeDistAvg)) + Bin #0: [390000000], sum=390000000.0 + Bin #1: [460000000, 130000000, 100000000], sum=690000000.0 + Bin #2: [160000000], sum=160000000.0 + Bin #3: [260000000], sum=260000000.0 + Bin #4: [270000000], sum=270000000.0 + + + >>> printbins(anytime(BinnerKeepingContents(), 10, [115268834, 22638149, 35260669, 68111031, 13376625, 20835125, 179398684, 69888000, 94462800, 5100340, 27184906, 305371, 272847, 545681, 1680746, 763835, 763835], [0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1], objective=obj.MinimizeDistAvg)) + Bin #0: [13376625, 5100340, 1680746, 763835, 763835, 545681, 305371, 272847], sum=22809280.0 + Bin #1: [20835125], sum=20835125.0 + Bin #2: [22638149], sum=22638149.0 + Bin #3: [27184906], sum=27184906.0 + Bin #4: [35260669], sum=35260669.0 + Bin #5: [68111031], sum=68111031.0 + Bin #6: [69888000], sum=69888000.0 + Bin #7: [94462800], sum=94462800.0 + Bin #8: [115268834], sum=115268834.0 + Bin #9: [179398684], sum=179398684.0 + + + >>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers,[0.1,0.9,0], objective=obj.MinimizeDistAvg)) + Bin #0: [16], sum=16.0 + Bin #1: [46, 39, 27, 26, 13, 10], sum=161.0 + Bin #2: [], sum=0.0 + + + >>> printbins(anytime(BinnerKeepingContents(), 5, [2,2,5,5,5,5,9], objective=obj.MinimizeDistAvg)) + Bin #0: [5], sum=5.0 + Bin #1: [5], sum=5.0 + Bin #2: [5, 2], sum=7.0 + Bin #3: [5, 2], sum=7.0 + Bin #4: [9], sum=9.0 + >>> printbins(anytime(BinnerKeepingContents(), 3, [1,1,1,1], objective=obj.MinimizeDistAvg)) + Bin #0: [1], sum=1.0 + Bin #1: [1], sum=1.0 + Bin #2: [1, 1], sum=2.0 + >>> printbins(anytime(BinnerKeepingContents(), 3, [1,1,1,1,1], objective=obj.MinimizeDistAvg)) + Bin #0: [1], sum=1.0 + Bin #1: [1, 1], sum=2.0 + Bin #2: [1, 1], sum=2.0 Compare results with and without the lower bound: >>> random_numbers = np.random.randint(1, 2**48-1, 10, dtype=np.int64) @@ -95,24 +153,34 @@ def anytime( end_time = start_time + time_limit sorted_items = sorted(items, key=binner.valueof, reverse=True) - sums_of_remaining_items = [sum(map(binner.valueof, sorted_items[i:])) for i in range(numitems)] + [0] # For Heuristic 3 + sums_of_remaining_items = [sum(map(binner.valueof, sorted_items[i:])) for i in range(numitems)] + [ + 0] # For Heuristic 3 + from prtpy import BinnerKeepingContents, BinnerKeepingSums, printbins best_bins, best_objective_value = None, np.inf - global_lower_bound = objective.lower_bound(np.zeros(numbins), sums_of_remaining_items[0], are_sums_in_ascending_order=True) - logger.info("\nComplete Greedy %s Partitioning of %d items into %d parts. Lower bound: %s", objective, numitems, numbins, global_lower_bound) + global_lower_bound = objective.lower_bound(np.zeros(numbins), sums_of_remaining_items[0], + are_sums_in_ascending_order=True) + + logger.info("\nComplete Greedy %s Partitioning of %d items into %d parts. Lower bound: %s", objective, numitems, + numbins, global_lower_bound) # Create a stack whose elements are a partition and the current depth. # Initially, it contains a single tuple: an empty partition with depth 0. - first_bins = binner.new_bins(numbins) + # If the input has relative values for each bin - + # we add a sum to each bin in order to equal them out to the bin with the highest relative value + # (at the end of the algorithm we will remove these sums). + first_bins = binner.new_bins(numbins) + if (relative_value): + for i in range(numbins): + binner.add_item_to_bin(first_bins, (max(relative_value) * sum(items) - relative_value[i] * sum(items)), i) first_vertex = (first_bins, 0) stack: List[Tuple[BinsArray, int]] = [first_vertex] if use_set_of_seen_states: seen_states = set(tuple(binner.sums(first_bins))) - # For logging and profiling: - complete_partitions_checked = 0 - intermediate_partitions_checked = 1 + complete_partitions_checked = 0 + intermediate_partitions_checked = 1 times_fast_lower_bound_activated = 0 times_lower_bound_activated = 0 @@ -134,27 +202,25 @@ def anytime( if new_objective_value < best_objective_value: best_bins, best_objective_value = current_bins, new_objective_value logger.info(" Found a better solution: %s, with value %s", current_bins, best_objective_value) - if new_objective_value<=global_lower_bound: + if new_objective_value <= global_lower_bound: logger.info(" Solution matches global lower bound - stopping") break continue - # Heuristic 3: "If the sum of the remaining unassigned integers plus the smallest current subset sum is <= the largest subset sum, all remaining integers are assigned to the subset with the smallest sum, terminating that branch of the tree." # Note that this heuristic is valid only for the objective "minimize largest sum"! - if use_heuristic_3 and objective==obj.MinimizeLargestSum: + if use_heuristic_3 and objective == obj.MinimizeLargestSum: if sums_of_remaining_items[depth] + current_sums[0] <= current_sums[-1]: new_bins = binner.copy_bins(current_bins) - for i in range(depth,numitems): + for i in range(depth, numitems): binner.add_item_to_bin(new_bins, sorted_items[i], 0) - binner.sort_by_ascending_sum(new_bins) + binner.sort_by_ascending_sum(new_bins) new_depth = numitems stack.append((new_bins, new_depth)) logger.debug(" Heuristic 3 activated") - times_heuristic_3_activated+=1 + times_heuristic_3_activated += 1 continue - next_item = sorted_items[depth] - sum_of_remaining_items = sums_of_remaining_items[depth+1] + sum_of_remaining_items = sums_of_remaining_items[depth + 1] previous_bin_sum = None @@ -162,27 +228,40 @@ def anytime( # But, since we use a stack, we have to insert it to the bin with the *largest* sum first, # so that it is pushed deeper into the stack. # Therefore, we proceed in reverse, by *descending* order of sum. - for bin_index in reversed(range(numbins)): + for bin_index in reversed(range(numbins)): # Heuristic 1: "If there are two subsets with the same sum, the current number is assigned to only one." current_bin_sum = current_sums[bin_index] if current_bin_sum == previous_bin_sum: - continue + continue previous_bin_sum = current_bin_sum # Fast-lower-bound heuristic - before creating the new vertex. - # Currently implemented only for two objectives: min-max and max-min. + # Currently implemented only for two objectives: min-max, max-min and min-dist-avg if use_fast_lower_bound: - if objective==obj.MinimizeLargestSum: + if objective == obj.MinimizeLargestSum: # "If an assignment to a subset creates a subset sum that equals or exceeds the largest subset sum in the best complete solution found so far, that branch is pruned from the tree." fast_lower_bound = max(current_bin_sum + binner.valueof(next_item), current_sums[-1]) - elif objective==obj.MaximizeSmallestSum: + elif objective == obj.MaximizeSmallestSum: # An adaptation of the above heuristic to maximizing the smallest sum. - if bin_index==0: - new_smallest_sum = min(current_sums[0]+binner.valueof(next_item), current_sums[1]) + if bin_index == 0: + new_smallest_sum = min(current_sums[0] + binner.valueof(next_item), current_sums[1]) else: new_smallest_sum = current_sums[0] - fast_lower_bound = -(new_smallest_sum+sum_of_remaining_items) + fast_lower_bound = -(new_smallest_sum + sum_of_remaining_items) + elif objective == obj.MinimizeDistAvg: + if relative_value: + fast_lower_bound = 0 + for i in range (numbins): + # For each bin: we take off the sum that we added in the beginning of the algorithm (max(relative_value) * sum(items) - relative_value[i] * sum(items)) + # Then we check if the difference between the bin's sum and the relative AVG for bin i: (sum(items)*relative_value[i]) + # is positive and contributes to our final difference or negative and we will not add anything to our difference. + fast_lower_bound = fast_lower_bound + max((current_sums[i]-(max(relative_value) * sum(items) - relative_value[i] * sum(items)))-sum(items)*relative_value[i],0) + else: + fast_lower_bound = 0 + avg = sum(items) / numbins + for i in range (numbins): + fast_lower_bound = fast_lower_bound + max(current_sums[i]-avg,0) else: fast_lower_bound = -np.inf if fast_lower_bound >= best_objective_value: @@ -190,52 +269,66 @@ def anytime( continue new_bins = binner.add_item_to_bin(binner.copy_bins(current_bins), next_item, bin_index) - binner.sort_by_ascending_sum(new_bins) + if not relative_value: + binner.sort_by_ascending_sum(new_bins) new_sums = tuple(binner.sums(new_bins)) # Lower-bound heuristic. if use_lower_bound: - lower_bound = objective.lower_bound(new_sums, sum_of_remaining_items, are_sums_in_ascending_order=True) + lower_bound = objective.lower_bound(new_sums, sum_of_remaining_items, are_sums_in_ascending_order=False) if lower_bound >= best_objective_value: logger.debug(" Lower bound %f too large", lower_bound) times_lower_bound_activated += 1 continue - if use_set_of_seen_states: + if use_set_of_seen_states: if new_sums in seen_states: logger.debug(" State %s already seen", new_sums) times_seen_state_skipped += 1 continue - seen_states.add(new_sums) # should be after if use_lower_bound + seen_states.add(new_sums) # should be after if use_lower_bound new_vertex = (new_bins, depth + 1) stack.append(new_vertex) intermediate_partitions_checked += 1 - logger.info("Checked %d out of %d complete partitions, and %d intermediate partitions.", complete_partitions_checked, numbins**numitems, intermediate_partitions_checked) - logger.info(" Heuristics: fast lower bound = %d, lower bound = %d, seen state = %d, heuristic 3 = %d.", times_fast_lower_bound_activated, times_lower_bound_activated, times_seen_state_skipped, times_heuristic_3_activated) + logger.info("Checked %d out of %d complete partitions, and %d intermediate partitions.", + complete_partitions_checked, numbins ** numitems, intermediate_partitions_checked) + logger.info(" Heuristics: fast lower bound = %d, lower bound = %d, seen state = %d, heuristic 3 = %d.", + times_fast_lower_bound_activated, times_lower_bound_activated, times_seen_state_skipped, + times_heuristic_3_activated) + + if (relative_value): + # For each bin we remove the value that we added in the beginning of the algorithm. + for i in range(numbins): + binner.remove_item_from_bin(best_bins, i, 0) + + for i in range(numbins): + binner.sums(best_bins)[i] = math.floor(binner.sums(best_bins)[i]) return best_bins if __name__ == "__main__": import doctest, sys + (failures, tests) = doctest.testmod(report=True, optionflags=doctest.FAIL_FAST) print("{} failures, {} tests".format(failures, tests)) - if failures>0: + if failures > 0: sys.exit() - + # DEMO logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler()) from prtpy import BinnerKeepingContents, BinnerKeepingSums - anytime(BinnerKeepingContents(), 2, [4,5,6,7,8], objective=obj.MinimizeLargestSum) + + anytime(BinnerKeepingContents(), 2, [4, 5, 6, 7, 8], objective=obj.MinimizeLargestSum) walter_numbers = [46, 39, 27, 26, 16, 13, 10] anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MaximizeSmallestSum) anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MinimizeLargestSum) - random_numbers = np.random.randint(1, 2**16-1, 15, dtype=np.int64) + random_numbers = np.random.randint(1, 2 ** 16 - 1, 15, dtype=np.int64) anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MaximizeSmallestSum) anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MinimizeLargestSum) anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MinimizeDifference)