Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complete greedy.py #22

Merged
merged 12 commits into from
May 15, 2024
10 changes: 9 additions & 1 deletion prtpy/binners.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,13 @@ def add_item_to_bin(self, bins:BinsArray, item: Any, bin_index: int)->BinsArray:
Return the bins after the addition.
"""
return bins


def remove_item_from_bin(self, bins:BinsArray, bin_index: int, item_index: int)->BinsArray:
sums, lists = bins
value = lists[bin_index][item_index]
sums[bin_index] -= value
del lists[bin_index][item_index]
return bins

@abstractmethod
def sort_by_ascending_sum(self, bins:BinsArray):
Expand All @@ -99,6 +105,8 @@ def numbins(self, bins: BinsArray) -> int:
"""
return None



@abstractmethod
def sums(self, bins: BinsArray) -> Tuple[float]:
"""
Expand Down
39 changes: 38 additions & 1 deletion prtpy/objectives.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ def lower_bound(self, sums:list, sum_of_remaining_items:float, are_sums_in_ascen
25.0
>>> MinimizeDifference.lower_bound([10,20,30,40,50], sum_of_remaining_items=45)
15.0

>>> MinimizeDifference.lower_bound([10,20,30,40,50], sum_of_remaining_items=200)
0.0
>>> MinimizeDifference.lower_bound([0,0,0,0,0], sum_of_remaining_items=54)
Expand All @@ -156,6 +155,44 @@ def lower_bound(self, sums:list, sum_of_remaining_items:float, are_sums_in_ascen



class MinimizeTheDistanceFromAvg(Objective):
def value_to_minimize(self, sums:list, are_sums_in_ascending_order:bool=False)->float:
avg = sum(sums) / len(sums)
diff_from_avg = 0
for s in sums:
if (s > avg):
diff_from_avg = diff_from_avg + (s - avg)
return diff_from_avg
def __str__(self) -> str:
return "minimize-the-distance-from-avg"
def lower_bound(self, sums:list, sum_of_remaining_items:float, are_sums_in_ascending_order:bool=False)->float:
"""
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should be an explanation to the algorithm

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added an explanation for the algorithm.

>>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=5)
28.0
>>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=20)
22.0
>>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=45)
12.0

>>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=200)
0.0
>>> MinimizeDistAvg.lower_bound([0,0,0,0,0], sum_of_remaining_items=54)
0.8
"""
remaining = sum_of_remaining_items
avg = (sum(sums) + remaining) / len(sums)
diff_from_avg = 0
for s in sums:
if (s < avg and remaining > 0):
remaining = remaining - min(remaining, int(avg - s))
if(s > avg):
diff_from_avg = diff_from_avg + (s - avg)
return diff_from_avg + ((remaining % len(sums)) / len(sums))

MinimizeDistAvg = MinimizeTheDistanceFromAvg()



if __name__ == "__main__":
import doctest

Expand Down
166 changes: 126 additions & 40 deletions prtpy/partitioning/complete_greedy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,28 @@
explains how to have a set with a custom key.

Programmer: Erel Segal-Halevi
Eitan Lichtman added Minimize Distance from Avg Objective
"""

import math
from typing import List, Tuple, Callable, Iterator, Any
import numpy as np
import logging, time

from prtpy import objectives as obj, Binner, BinsArray

logger = logging.getLogger(__name__)



def anytime(
binner: Binner, numbins: int, items: List[any],
binner: Binner, numbins: int, items: List[any], relative_value: List[any] = None,
objective: obj.Objective = obj.MinimizeDifference,
use_lower_bound: bool = True, # Prune branches whose lower bound (= optimistic value) is at least as large as the current minimum.
use_fast_lower_bound: bool = True, # A faster lower bound, that does not create the branch at all. Useful for min-max and max-min objectives.
use_heuristic_3: bool = False, # An improved stopping condition, applicable for min-max only. Not very useful in experiments.
use_set_of_seen_states: bool = True,
use_lower_bound: bool = True,
# Prune branches whose lower bound (= optimistic value) is at least as large as the current minimum.
use_fast_lower_bound: bool = True,
# A faster lower bound, that does not create the branch at all. Useful for min-max max-min and min-dist-avg objectives.
use_heuristic_3: bool = False,
# An improved stopping condition, applicable for min-max only. Not very useful in experiments.
use_set_of_seen_states: bool = True,
time_limit: float = np.inf,
) -> Iterator:
"""
Expand All @@ -41,6 +45,10 @@ def anytime(
Bin #0: [6, 5, 4], sum=15.0
Bin #1: [8, 7], sum=15.0

>>> printbins(anytime(BinnerKeepingContents(), 2, [4,5,6,7,8], [0.3,0.7], objective=obj.MinimizeDistAvg))
Bin #0: [5, 4], sum=9.0
Bin #1: [8, 7, 6], sum=21.0

The following examples are based on:
Walter (2013), 'Comparing the minimum completion times of two longest-first scheduling-heuristics'.
>>> walter_numbers = [46, 39, 27, 26, 16, 13, 10]
Expand All @@ -56,6 +64,56 @@ def anytime(
Bin #0: [46, 10], sum=56.0
Bin #1: [27, 16, 13], sum=56.0
Bin #2: [39, 26], sum=65.0
>>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MinimizeDistAvg))
Bin #0: [39, 16], sum=55.0
Bin #1: [46, 13], sum=59.0
Bin #2: [27, 26, 10], sum=63.0
>>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers,[0.2,0.4,0.4], objective=obj.MinimizeDistAvg))
Bin #0: [27, 10], sum=37.0
Bin #1: [39, 16, 13], sum=68.0
Bin #2: [46, 26], sum=72.0

>>> printbins(anytime(BinnerKeepingContents(), 5, [460000000, 390000000, 270000000, 260000000, 160000000, 130000000, 100000000],[0.2,0.4,0.1,0.15,0.15], objective=obj.MinimizeDistAvg))
Bin #0: [390000000], sum=390000000.0
Bin #1: [460000000, 130000000, 100000000], sum=690000000.0
Bin #2: [160000000], sum=160000000.0
Bin #3: [260000000], sum=260000000.0
Bin #4: [270000000], sum=270000000.0


>>> printbins(anytime(BinnerKeepingContents(), 10, [115268834, 22638149, 35260669, 68111031, 13376625, 20835125, 179398684, 69888000, 94462800, 5100340, 27184906, 305371, 272847, 545681, 1680746, 763835, 763835], [0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1], objective=obj.MinimizeDistAvg))
Bin #0: [13376625, 5100340, 1680746, 763835, 763835, 545681, 305371, 272847], sum=22809280.0
Bin #1: [20835125], sum=20835125.0
Bin #2: [22638149], sum=22638149.0
Bin #3: [27184906], sum=27184906.0
Bin #4: [35260669], sum=35260669.0
Bin #5: [68111031], sum=68111031.0
Bin #6: [69888000], sum=69888000.0
Bin #7: [94462800], sum=94462800.0
Bin #8: [115268834], sum=115268834.0
Bin #9: [179398684], sum=179398684.0


>>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers,[0.1,0.9,0], objective=obj.MinimizeDistAvg))
Bin #0: [16], sum=16.0
Bin #1: [46, 39, 27, 26, 13, 10], sum=161.0
Bin #2: [], sum=0.0


>>> printbins(anytime(BinnerKeepingContents(), 5, [2,2,5,5,5,5,9], objective=obj.MinimizeDistAvg))
Bin #0: [5], sum=5.0
Bin #1: [5], sum=5.0
Bin #2: [5, 2], sum=7.0
Bin #3: [5, 2], sum=7.0
Bin #4: [9], sum=9.0
>>> printbins(anytime(BinnerKeepingContents(), 3, [1,1,1,1], objective=obj.MinimizeDistAvg))
Bin #0: [1], sum=1.0
Bin #1: [1], sum=1.0
Bin #2: [1, 1], sum=2.0
>>> printbins(anytime(BinnerKeepingContents(), 3, [1,1,1,1,1], objective=obj.MinimizeDistAvg))
Bin #0: [1], sum=1.0
Bin #1: [1, 1], sum=2.0
Bin #2: [1, 1], sum=2.0

Compare results with and without the lower bound:
>>> random_numbers = np.random.randint(1, 2**48-1, 10, dtype=np.int64)
Expand Down Expand Up @@ -95,24 +153,31 @@ def anytime(
end_time = start_time + time_limit

sorted_items = sorted(items, key=binner.valueof, reverse=True)
sums_of_remaining_items = [sum(map(binner.valueof, sorted_items[i:])) for i in range(numitems)] + [0] # For Heuristic 3
sums_of_remaining_items = [sum(map(binner.valueof, sorted_items[i:])) for i in range(numitems)] + [
0] # For Heuristic 3
from prtpy import BinnerKeepingContents, BinnerKeepingSums, printbins
best_bins, best_objective_value = None, np.inf

global_lower_bound = objective.lower_bound(np.zeros(numbins), sums_of_remaining_items[0], are_sums_in_ascending_order=True)

logger.info("\nComplete Greedy %s Partitioning of %d items into %d parts. Lower bound: %s", objective, numitems, numbins, global_lower_bound)
global_lower_bound = objective.lower_bound(np.zeros(numbins), sums_of_remaining_items[0],
are_sums_in_ascending_order=True)

logger.info("\nComplete Greedy %s Partitioning of %d items into %d parts. Lower bound: %s", objective, numitems,
numbins, global_lower_bound)

# Create a stack whose elements are a partition and the current depth.
# Initially, it contains a single tuple: an empty partition with depth 0.
first_bins = binner.new_bins(numbins)
first_bins = binner.new_bins(numbins)
if (relative_value):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part of the code should be explained in the documentation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added explanation in the documentation.

for i in range(numbins):
binner.add_item_to_bin(first_bins, (max(relative_value) * sum(items) - relative_value[i] * sum(items)), i)
first_vertex = (first_bins, 0)
stack: List[Tuple[BinsArray, int]] = [first_vertex]
if use_set_of_seen_states:
seen_states = set(tuple(binner.sums(first_bins)))

# For logging and profiling:
complete_partitions_checked = 0
intermediate_partitions_checked = 1
complete_partitions_checked = 0
intermediate_partitions_checked = 1

times_fast_lower_bound_activated = 0
times_lower_bound_activated = 0
Expand All @@ -134,108 +199,129 @@ def anytime(
if new_objective_value < best_objective_value:
best_bins, best_objective_value = current_bins, new_objective_value
logger.info(" Found a better solution: %s, with value %s", current_bins, best_objective_value)
if new_objective_value<=global_lower_bound:
if new_objective_value <= global_lower_bound:
logger.info(" Solution matches global lower bound - stopping")
break
continue

# Heuristic 3: "If the sum of the remaining unassigned integers plus the smallest current subset sum is <= the largest subset sum, all remaining integers are assigned to the subset with the smallest sum, terminating that branch of the tree."
# Note that this heuristic is valid only for the objective "minimize largest sum"!
if use_heuristic_3 and objective==obj.MinimizeLargestSum:
if use_heuristic_3 and objective == obj.MinimizeLargestSum:
if sums_of_remaining_items[depth] + current_sums[0] <= current_sums[-1]:
new_bins = binner.copy_bins(current_bins)
for i in range(depth,numitems):
for i in range(depth, numitems):
binner.add_item_to_bin(new_bins, sorted_items[i], 0)
binner.sort_by_ascending_sum(new_bins)
binner.sort_by_ascending_sum(new_bins)
new_depth = numitems
stack.append((new_bins, new_depth))
logger.debug(" Heuristic 3 activated")
times_heuristic_3_activated+=1
times_heuristic_3_activated += 1
continue

next_item = sorted_items[depth]
sum_of_remaining_items = sums_of_remaining_items[depth+1]
sum_of_remaining_items = sums_of_remaining_items[depth + 1]

previous_bin_sum = None

# We want to insert the next item to the bin with the *smallest* sum first.
# But, since we use a stack, we have to insert it to the bin with the *largest* sum first,
# so that it is pushed deeper into the stack.
# Therefore, we proceed in reverse, by *descending* order of sum.
for bin_index in reversed(range(numbins)):
for bin_index in reversed(range(numbins)):

# Heuristic 1: "If there are two subsets with the same sum, the current number is assigned to only one."
current_bin_sum = current_sums[bin_index]
if current_bin_sum == previous_bin_sum:
continue
continue
previous_bin_sum = current_bin_sum

# Fast-lower-bound heuristic - before creating the new vertex.
# Currently implemented only for two objectives: min-max and max-min.
# Currently implemented only for two objectives: min-max, max-min and min-dist-avg
if use_fast_lower_bound:
if objective==obj.MinimizeLargestSum:
if objective == obj.MinimizeLargestSum:
# "If an assignment to a subset creates a subset sum that equals or exceeds the largest subset sum in the best complete solution found so far, that branch is pruned from the tree."
fast_lower_bound = max(current_bin_sum + binner.valueof(next_item), current_sums[-1])
elif objective==obj.MaximizeSmallestSum:
elif objective == obj.MaximizeSmallestSum:
# An adaptation of the above heuristic to maximizing the smallest sum.
if bin_index==0:
new_smallest_sum = min(current_sums[0]+binner.valueof(next_item), current_sums[1])
if bin_index == 0:
new_smallest_sum = min(current_sums[0] + binner.valueof(next_item), current_sums[1])
else:
new_smallest_sum = current_sums[0]
fast_lower_bound = -(new_smallest_sum+sum_of_remaining_items)
fast_lower_bound = -(new_smallest_sum + sum_of_remaining_items)
elif objective == obj.MinimizeDistAvg:
if relative_value:
fast_lower_bound = 0
for i in range (numbins):
fast_lower_bound = fast_lower_bound + max((current_sums[i]-(max(relative_value) * sum(items) - relative_value[i] * sum(items)))-sum(items)*relative_value[i],0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The algorithm should be explained

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added explanation.

else:
fast_lower_bound = 0
avg = sum(items) / numbins
for i in range (numbins):
fast_lower_bound = fast_lower_bound + max(current_sums[i]-avg,0)
else:
fast_lower_bound = -np.inf
if fast_lower_bound >= best_objective_value:
times_fast_lower_bound_activated += 1
continue

new_bins = binner.add_item_to_bin(binner.copy_bins(current_bins), next_item, bin_index)
binner.sort_by_ascending_sum(new_bins)
if not relative_value:
binner.sort_by_ascending_sum(new_bins)
new_sums = tuple(binner.sums(new_bins))

# Lower-bound heuristic.
if use_lower_bound:
lower_bound = objective.lower_bound(new_sums, sum_of_remaining_items, are_sums_in_ascending_order=True)
lower_bound = objective.lower_bound(new_sums, sum_of_remaining_items, are_sums_in_ascending_order=False)
if lower_bound >= best_objective_value:
logger.debug(" Lower bound %f too large", lower_bound)
times_lower_bound_activated += 1
continue
if use_set_of_seen_states:
if use_set_of_seen_states:
if new_sums in seen_states:
logger.debug(" State %s already seen", new_sums)
times_seen_state_skipped += 1
continue
seen_states.add(new_sums) # should be after if use_lower_bound
seen_states.add(new_sums) # should be after if use_lower_bound

new_vertex = (new_bins, depth + 1)
stack.append(new_vertex)
intermediate_partitions_checked += 1

logger.info("Checked %d out of %d complete partitions, and %d intermediate partitions.", complete_partitions_checked, numbins**numitems, intermediate_partitions_checked)
logger.info(" Heuristics: fast lower bound = %d, lower bound = %d, seen state = %d, heuristic 3 = %d.", times_fast_lower_bound_activated, times_lower_bound_activated, times_seen_state_skipped, times_heuristic_3_activated)
logger.info("Checked %d out of %d complete partitions, and %d intermediate partitions.",
complete_partitions_checked, numbins ** numitems, intermediate_partitions_checked)
logger.info(" Heuristics: fast lower bound = %d, lower bound = %d, seen state = %d, heuristic 3 = %d.",
times_fast_lower_bound_activated, times_lower_bound_activated, times_seen_state_skipped,
times_heuristic_3_activated)


if (relative_value):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code should be explained

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added explanation.

for i in range(numbins):
binner.remove_item_from_bin(best_bins, i, 0)

for i in range(numbins):
binner.sums(best_bins)[i] = math.floor(binner.sums(best_bins)[i])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are the sums rounded to integers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sums were supposed to be integers the whole time but with relative values - I needed to add a double value into each one of the bins in the beggining of the algorithm. This gave me issues, for example some values were something.00000000000001 so I just turned them into integers after removing the double value.

return best_bins


if __name__ == "__main__":
import doctest, sys

(failures, tests) = doctest.testmod(report=True, optionflags=doctest.FAIL_FAST)
print("{} failures, {} tests".format(failures, tests))
if failures>0:
if failures > 0:
sys.exit()

# DEMO
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())

from prtpy import BinnerKeepingContents, BinnerKeepingSums
anytime(BinnerKeepingContents(), 2, [4,5,6,7,8], objective=obj.MinimizeLargestSum)

anytime(BinnerKeepingContents(), 2, [4, 5, 6, 7, 8], objective=obj.MinimizeLargestSum)

walter_numbers = [46, 39, 27, 26, 16, 13, 10]
anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MaximizeSmallestSum)
anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MinimizeLargestSum)

random_numbers = np.random.randint(1, 2**16-1, 15, dtype=np.int64)
random_numbers = np.random.randint(1, 2 ** 16 - 1, 15, dtype=np.int64)
anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MaximizeSmallestSum)
anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MinimizeLargestSum)
anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MinimizeDifference)
Loading