-
Notifications
You must be signed in to change notification settings - Fork 20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Complete greedy.py #22
Changes from 8 commits
fa3ecd3
f16edb6
1ebd257
c8ab713
d60bc7e
ec0b855
68793ee
7c7aea7
a58bde2
8a0acd1
b2bd902
964feea
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,24 +10,28 @@ | |
explains how to have a set with a custom key. | ||
|
||
Programmer: Erel Segal-Halevi | ||
Eitan Lichtman added Minimize Distance from Avg Objective | ||
""" | ||
|
||
import math | ||
from typing import List, Tuple, Callable, Iterator, Any | ||
import numpy as np | ||
import logging, time | ||
|
||
from prtpy import objectives as obj, Binner, BinsArray | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
|
||
def anytime( | ||
binner: Binner, numbins: int, items: List[any], | ||
binner: Binner, numbins: int, items: List[any], relative_value: List[any] = None, | ||
objective: obj.Objective = obj.MinimizeDifference, | ||
use_lower_bound: bool = True, # Prune branches whose lower bound (= optimistic value) is at least as large as the current minimum. | ||
use_fast_lower_bound: bool = True, # A faster lower bound, that does not create the branch at all. Useful for min-max and max-min objectives. | ||
use_heuristic_3: bool = False, # An improved stopping condition, applicable for min-max only. Not very useful in experiments. | ||
use_set_of_seen_states: bool = True, | ||
use_lower_bound: bool = True, | ||
# Prune branches whose lower bound (= optimistic value) is at least as large as the current minimum. | ||
use_fast_lower_bound: bool = True, | ||
# A faster lower bound, that does not create the branch at all. Useful for min-max max-min and min-dist-avg objectives. | ||
use_heuristic_3: bool = False, | ||
# An improved stopping condition, applicable for min-max only. Not very useful in experiments. | ||
use_set_of_seen_states: bool = True, | ||
time_limit: float = np.inf, | ||
) -> Iterator: | ||
""" | ||
|
@@ -41,6 +45,10 @@ def anytime( | |
Bin #0: [6, 5, 4], sum=15.0 | ||
Bin #1: [8, 7], sum=15.0 | ||
|
||
>>> printbins(anytime(BinnerKeepingContents(), 2, [4,5,6,7,8], [0.3,0.7], objective=obj.MinimizeDistAvg)) | ||
Bin #0: [5, 4], sum=9.0 | ||
Bin #1: [8, 7, 6], sum=21.0 | ||
|
||
The following examples are based on: | ||
Walter (2013), 'Comparing the minimum completion times of two longest-first scheduling-heuristics'. | ||
>>> walter_numbers = [46, 39, 27, 26, 16, 13, 10] | ||
|
@@ -56,6 +64,56 @@ def anytime( | |
Bin #0: [46, 10], sum=56.0 | ||
Bin #1: [27, 16, 13], sum=56.0 | ||
Bin #2: [39, 26], sum=65.0 | ||
>>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MinimizeDistAvg)) | ||
Bin #0: [39, 16], sum=55.0 | ||
Bin #1: [46, 13], sum=59.0 | ||
Bin #2: [27, 26, 10], sum=63.0 | ||
>>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers,[0.2,0.4,0.4], objective=obj.MinimizeDistAvg)) | ||
Bin #0: [27, 10], sum=37.0 | ||
Bin #1: [39, 16, 13], sum=68.0 | ||
Bin #2: [46, 26], sum=72.0 | ||
|
||
>>> printbins(anytime(BinnerKeepingContents(), 5, [460000000, 390000000, 270000000, 260000000, 160000000, 130000000, 100000000],[0.2,0.4,0.1,0.15,0.15], objective=obj.MinimizeDistAvg)) | ||
Bin #0: [390000000], sum=390000000.0 | ||
Bin #1: [460000000, 130000000, 100000000], sum=690000000.0 | ||
Bin #2: [160000000], sum=160000000.0 | ||
Bin #3: [260000000], sum=260000000.0 | ||
Bin #4: [270000000], sum=270000000.0 | ||
|
||
|
||
>>> printbins(anytime(BinnerKeepingContents(), 10, [115268834, 22638149, 35260669, 68111031, 13376625, 20835125, 179398684, 69888000, 94462800, 5100340, 27184906, 305371, 272847, 545681, 1680746, 763835, 763835], [0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1], objective=obj.MinimizeDistAvg)) | ||
Bin #0: [13376625, 5100340, 1680746, 763835, 763835, 545681, 305371, 272847], sum=22809280.0 | ||
Bin #1: [20835125], sum=20835125.0 | ||
Bin #2: [22638149], sum=22638149.0 | ||
Bin #3: [27184906], sum=27184906.0 | ||
Bin #4: [35260669], sum=35260669.0 | ||
Bin #5: [68111031], sum=68111031.0 | ||
Bin #6: [69888000], sum=69888000.0 | ||
Bin #7: [94462800], sum=94462800.0 | ||
Bin #8: [115268834], sum=115268834.0 | ||
Bin #9: [179398684], sum=179398684.0 | ||
|
||
|
||
>>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers,[0.1,0.9,0], objective=obj.MinimizeDistAvg)) | ||
Bin #0: [16], sum=16.0 | ||
Bin #1: [46, 39, 27, 26, 13, 10], sum=161.0 | ||
Bin #2: [], sum=0.0 | ||
|
||
|
||
>>> printbins(anytime(BinnerKeepingContents(), 5, [2,2,5,5,5,5,9], objective=obj.MinimizeDistAvg)) | ||
Bin #0: [5], sum=5.0 | ||
Bin #1: [5], sum=5.0 | ||
Bin #2: [5, 2], sum=7.0 | ||
Bin #3: [5, 2], sum=7.0 | ||
Bin #4: [9], sum=9.0 | ||
>>> printbins(anytime(BinnerKeepingContents(), 3, [1,1,1,1], objective=obj.MinimizeDistAvg)) | ||
Bin #0: [1], sum=1.0 | ||
Bin #1: [1], sum=1.0 | ||
Bin #2: [1, 1], sum=2.0 | ||
>>> printbins(anytime(BinnerKeepingContents(), 3, [1,1,1,1,1], objective=obj.MinimizeDistAvg)) | ||
Bin #0: [1], sum=1.0 | ||
Bin #1: [1, 1], sum=2.0 | ||
Bin #2: [1, 1], sum=2.0 | ||
|
||
Compare results with and without the lower bound: | ||
>>> random_numbers = np.random.randint(1, 2**48-1, 10, dtype=np.int64) | ||
|
@@ -95,24 +153,31 @@ def anytime( | |
end_time = start_time + time_limit | ||
|
||
sorted_items = sorted(items, key=binner.valueof, reverse=True) | ||
sums_of_remaining_items = [sum(map(binner.valueof, sorted_items[i:])) for i in range(numitems)] + [0] # For Heuristic 3 | ||
sums_of_remaining_items = [sum(map(binner.valueof, sorted_items[i:])) for i in range(numitems)] + [ | ||
0] # For Heuristic 3 | ||
from prtpy import BinnerKeepingContents, BinnerKeepingSums, printbins | ||
best_bins, best_objective_value = None, np.inf | ||
|
||
global_lower_bound = objective.lower_bound(np.zeros(numbins), sums_of_remaining_items[0], are_sums_in_ascending_order=True) | ||
|
||
logger.info("\nComplete Greedy %s Partitioning of %d items into %d parts. Lower bound: %s", objective, numitems, numbins, global_lower_bound) | ||
global_lower_bound = objective.lower_bound(np.zeros(numbins), sums_of_remaining_items[0], | ||
are_sums_in_ascending_order=True) | ||
|
||
logger.info("\nComplete Greedy %s Partitioning of %d items into %d parts. Lower bound: %s", objective, numitems, | ||
numbins, global_lower_bound) | ||
|
||
# Create a stack whose elements are a partition and the current depth. | ||
# Initially, it contains a single tuple: an empty partition with depth 0. | ||
first_bins = binner.new_bins(numbins) | ||
first_bins = binner.new_bins(numbins) | ||
if (relative_value): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This part of the code should be explained in the documentation There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added explanation in the documentation. |
||
for i in range(numbins): | ||
binner.add_item_to_bin(first_bins, (max(relative_value) * sum(items) - relative_value[i] * sum(items)), i) | ||
first_vertex = (first_bins, 0) | ||
stack: List[Tuple[BinsArray, int]] = [first_vertex] | ||
if use_set_of_seen_states: | ||
seen_states = set(tuple(binner.sums(first_bins))) | ||
|
||
# For logging and profiling: | ||
complete_partitions_checked = 0 | ||
intermediate_partitions_checked = 1 | ||
complete_partitions_checked = 0 | ||
intermediate_partitions_checked = 1 | ||
|
||
times_fast_lower_bound_activated = 0 | ||
times_lower_bound_activated = 0 | ||
|
@@ -134,108 +199,129 @@ def anytime( | |
if new_objective_value < best_objective_value: | ||
best_bins, best_objective_value = current_bins, new_objective_value | ||
logger.info(" Found a better solution: %s, with value %s", current_bins, best_objective_value) | ||
if new_objective_value<=global_lower_bound: | ||
if new_objective_value <= global_lower_bound: | ||
logger.info(" Solution matches global lower bound - stopping") | ||
break | ||
continue | ||
|
||
# Heuristic 3: "If the sum of the remaining unassigned integers plus the smallest current subset sum is <= the largest subset sum, all remaining integers are assigned to the subset with the smallest sum, terminating that branch of the tree." | ||
# Note that this heuristic is valid only for the objective "minimize largest sum"! | ||
if use_heuristic_3 and objective==obj.MinimizeLargestSum: | ||
if use_heuristic_3 and objective == obj.MinimizeLargestSum: | ||
if sums_of_remaining_items[depth] + current_sums[0] <= current_sums[-1]: | ||
new_bins = binner.copy_bins(current_bins) | ||
for i in range(depth,numitems): | ||
for i in range(depth, numitems): | ||
binner.add_item_to_bin(new_bins, sorted_items[i], 0) | ||
binner.sort_by_ascending_sum(new_bins) | ||
binner.sort_by_ascending_sum(new_bins) | ||
new_depth = numitems | ||
stack.append((new_bins, new_depth)) | ||
logger.debug(" Heuristic 3 activated") | ||
times_heuristic_3_activated+=1 | ||
times_heuristic_3_activated += 1 | ||
continue | ||
|
||
next_item = sorted_items[depth] | ||
sum_of_remaining_items = sums_of_remaining_items[depth+1] | ||
sum_of_remaining_items = sums_of_remaining_items[depth + 1] | ||
|
||
previous_bin_sum = None | ||
|
||
# We want to insert the next item to the bin with the *smallest* sum first. | ||
# But, since we use a stack, we have to insert it to the bin with the *largest* sum first, | ||
# so that it is pushed deeper into the stack. | ||
# Therefore, we proceed in reverse, by *descending* order of sum. | ||
for bin_index in reversed(range(numbins)): | ||
for bin_index in reversed(range(numbins)): | ||
|
||
# Heuristic 1: "If there are two subsets with the same sum, the current number is assigned to only one." | ||
current_bin_sum = current_sums[bin_index] | ||
if current_bin_sum == previous_bin_sum: | ||
continue | ||
continue | ||
previous_bin_sum = current_bin_sum | ||
|
||
# Fast-lower-bound heuristic - before creating the new vertex. | ||
# Currently implemented only for two objectives: min-max and max-min. | ||
# Currently implemented only for two objectives: min-max, max-min and min-dist-avg | ||
if use_fast_lower_bound: | ||
if objective==obj.MinimizeLargestSum: | ||
if objective == obj.MinimizeLargestSum: | ||
# "If an assignment to a subset creates a subset sum that equals or exceeds the largest subset sum in the best complete solution found so far, that branch is pruned from the tree." | ||
fast_lower_bound = max(current_bin_sum + binner.valueof(next_item), current_sums[-1]) | ||
elif objective==obj.MaximizeSmallestSum: | ||
elif objective == obj.MaximizeSmallestSum: | ||
# An adaptation of the above heuristic to maximizing the smallest sum. | ||
if bin_index==0: | ||
new_smallest_sum = min(current_sums[0]+binner.valueof(next_item), current_sums[1]) | ||
if bin_index == 0: | ||
new_smallest_sum = min(current_sums[0] + binner.valueof(next_item), current_sums[1]) | ||
else: | ||
new_smallest_sum = current_sums[0] | ||
fast_lower_bound = -(new_smallest_sum+sum_of_remaining_items) | ||
fast_lower_bound = -(new_smallest_sum + sum_of_remaining_items) | ||
elif objective == obj.MinimizeDistAvg: | ||
if relative_value: | ||
fast_lower_bound = 0 | ||
for i in range (numbins): | ||
fast_lower_bound = fast_lower_bound + max((current_sums[i]-(max(relative_value) * sum(items) - relative_value[i] * sum(items)))-sum(items)*relative_value[i],0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The algorithm should be explained There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added explanation. |
||
else: | ||
fast_lower_bound = 0 | ||
avg = sum(items) / numbins | ||
for i in range (numbins): | ||
fast_lower_bound = fast_lower_bound + max(current_sums[i]-avg,0) | ||
else: | ||
fast_lower_bound = -np.inf | ||
if fast_lower_bound >= best_objective_value: | ||
times_fast_lower_bound_activated += 1 | ||
continue | ||
|
||
new_bins = binner.add_item_to_bin(binner.copy_bins(current_bins), next_item, bin_index) | ||
binner.sort_by_ascending_sum(new_bins) | ||
if not relative_value: | ||
binner.sort_by_ascending_sum(new_bins) | ||
new_sums = tuple(binner.sums(new_bins)) | ||
|
||
# Lower-bound heuristic. | ||
if use_lower_bound: | ||
lower_bound = objective.lower_bound(new_sums, sum_of_remaining_items, are_sums_in_ascending_order=True) | ||
lower_bound = objective.lower_bound(new_sums, sum_of_remaining_items, are_sums_in_ascending_order=False) | ||
if lower_bound >= best_objective_value: | ||
logger.debug(" Lower bound %f too large", lower_bound) | ||
times_lower_bound_activated += 1 | ||
continue | ||
if use_set_of_seen_states: | ||
if use_set_of_seen_states: | ||
if new_sums in seen_states: | ||
logger.debug(" State %s already seen", new_sums) | ||
times_seen_state_skipped += 1 | ||
continue | ||
seen_states.add(new_sums) # should be after if use_lower_bound | ||
seen_states.add(new_sums) # should be after if use_lower_bound | ||
|
||
new_vertex = (new_bins, depth + 1) | ||
stack.append(new_vertex) | ||
intermediate_partitions_checked += 1 | ||
|
||
logger.info("Checked %d out of %d complete partitions, and %d intermediate partitions.", complete_partitions_checked, numbins**numitems, intermediate_partitions_checked) | ||
logger.info(" Heuristics: fast lower bound = %d, lower bound = %d, seen state = %d, heuristic 3 = %d.", times_fast_lower_bound_activated, times_lower_bound_activated, times_seen_state_skipped, times_heuristic_3_activated) | ||
logger.info("Checked %d out of %d complete partitions, and %d intermediate partitions.", | ||
complete_partitions_checked, numbins ** numitems, intermediate_partitions_checked) | ||
logger.info(" Heuristics: fast lower bound = %d, lower bound = %d, seen state = %d, heuristic 3 = %d.", | ||
times_fast_lower_bound_activated, times_lower_bound_activated, times_seen_state_skipped, | ||
times_heuristic_3_activated) | ||
|
||
|
||
if (relative_value): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code should be explained There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added explanation. |
||
for i in range(numbins): | ||
binner.remove_item_from_bin(best_bins, i, 0) | ||
|
||
for i in range(numbins): | ||
binner.sums(best_bins)[i] = math.floor(binner.sums(best_bins)[i]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are the sums rounded to integers? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The sums were supposed to be integers the whole time but with relative values - I needed to add a double value into each one of the bins in the beggining of the algorithm. This gave me issues, for example some values were something.00000000000001 so I just turned them into integers after removing the double value. |
||
return best_bins | ||
|
||
|
||
if __name__ == "__main__": | ||
import doctest, sys | ||
|
||
(failures, tests) = doctest.testmod(report=True, optionflags=doctest.FAIL_FAST) | ||
print("{} failures, {} tests".format(failures, tests)) | ||
if failures>0: | ||
if failures > 0: | ||
sys.exit() | ||
|
||
# DEMO | ||
logger.setLevel(logging.INFO) | ||
logger.addHandler(logging.StreamHandler()) | ||
|
||
from prtpy import BinnerKeepingContents, BinnerKeepingSums | ||
anytime(BinnerKeepingContents(), 2, [4,5,6,7,8], objective=obj.MinimizeLargestSum) | ||
|
||
anytime(BinnerKeepingContents(), 2, [4, 5, 6, 7, 8], objective=obj.MinimizeLargestSum) | ||
|
||
walter_numbers = [46, 39, 27, 26, 16, 13, 10] | ||
anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MaximizeSmallestSum) | ||
anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MinimizeLargestSum) | ||
|
||
random_numbers = np.random.randint(1, 2**16-1, 15, dtype=np.int64) | ||
random_numbers = np.random.randint(1, 2 ** 16 - 1, 15, dtype=np.int64) | ||
anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MaximizeSmallestSum) | ||
anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MinimizeLargestSum) | ||
anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MinimizeDifference) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There should be an explanation to the algorithm
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added an explanation for the algorithm.