-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhash_heap.py
201 lines (171 loc) · 5.9 KB
/
hash_heap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
'''
HashHeap is a datastructure that supports O(logn) remove, O(1) top, O(logn)
pop, O(logn) push with regarding to time complexity.
Note: this version of HashHeap DOES support duplicate elements
Created on May 6, 2019
@author: tonytan4ever
'''
class HeapNode(object):
"""
The node in the HashHeap to deal with duplicates.
Each node store the value of each element and the number of duplicates
with the same value.
http://hankerzheng.com/blog/Python-Hash-Heap
"""
def __init__(self, val, cnt):
self.val = val
self.cnt = cnt
def __gt__(self, other):
return self.val > other.val
def __le__(self, other):
return self.val <= other.val
def __eq__(self, other):
return self.val == other.val
def __str__(self):
return "[%s, %d]" % (self.val, self.cnt)
__repr__ = __str__
class HashHeap(object):
"""
This HashHeap is the same as the list implementation of binary heap,
but with a hashMap to map the value of one elemnt to its index in the
list.
"""
def __init__(self, arr):
"""
`_cap` - the number of elements in the HashHeap
`_maxIdx` - the max index of the binary heap
`_data` - the list implementation of the binary heap
`_hashMap` - mapping the element to its index in the binary heap
"""
elemCnt = self._preProcess(arr)
self._cap = len(arr)
self._maxIdx = len(elemCnt) - 1
self._data = [HeapNode(key, value) for key, value in elemCnt.items()]
self._hashMap = {node.val: idx for idx, node in enumerate(self._data)}
self._heapify()
def _preProcess(self, arr):
"""
Convert the input array into a dict object.
The key to the dict is the value of the element.
The value of the dict is the occurence of each element.
"""
elemCnt = {}
for elem in arr:
elemCnt[elem] = elemCnt.get(elem, 0) + 1
return elemCnt
def _swap(self, idx1, idx2):
"""
Swap the 2 elements in the heap.
Also, change the index stored in `self._hashMap`
"""
elem1, elem2 = self._data[idx1], self._data[idx2]
self._hashMap[elem1.val] = idx2
self._hashMap[elem2.val] = idx1
self._data[idx1], self._data[idx2] = elem2, elem1
def _heapify(self):
idx = self._maxIdx
while idx > 0:
parentIdx = (idx - 1) / 2
if self._data[parentIdx] > self._data[idx]:
self._swap(parentIdx, idx)
self._siftDown(idx)
idx -= 1
def _siftDown(self, idx):
def heapValid(idx):
left, right = idx * 2 + 1, idx * 2 + 2
if left > self._maxIdx:
return True
if right > self._maxIdx:
return self._data[idx] <= self._data[left]
return (self._data[idx] <= self._data[left] and
self._data[idx] <= self._data[right])
def smallerChild(idx):
left, right = idx * 2 + 1, idx * 2 + 2
if left > self._maxIdx:
return None
if right > self._maxIdx:
return left
return left if self._data[left] < self._data[right] else right
current = idx
while not heapValid(current):
child = smallerChild(current)
self._swap(current, child)
current = child
def _siftUp(self, idx):
current = idx
parent = (current - 1) // 2
while current > 0 and self._data[parent] > self._data[current]:
self._swap(parent, current)
current = parent
parent = (current - 1) // 2
def _removeLastNode(self):
rmNode = self._data.pop(-1)
self._cap -= 1
self._maxIdx -= 1
self._hashMap.pop(rmNode.val)
def _removeByIdx(self, idx):
thisNode = self._data[idx]
retVal = thisNode.val
if thisNode.cnt > 1:
thisNode.cnt -= 1
self._cap -= 1
elif idx == self._maxIdx:
# the node itself is the last node
self._removeLastNode()
else:
self._swap(idx, self._maxIdx)
self._removeLastNode()
pidx = (idx - 1) // 2
# check to see we should sift up or sift down
if pidx >= 0 and self._data[pidx] > self._data[idx]:
self._siftUp(idx)
else:
self._siftDown(idx)
return retVal
@property
def length(self):
"""
Return the number of elements in the Hash Heap
"""
return self._cap
def heapPeep(self):
"""
Return the MIN element in the Hash Heap
"""
if not self._data:
return float("inf")
return self._data[0].val
def heapPop(self):
"""
Remove the MIN element from the Hash Heap and return its value
"""
return self._removeByIdx(0)
def heapPush(self, elem):
"""
Push a new element into the Hash Heap
"""
self._cap += 1
if elem not in self._hashMap:
self._maxIdx += 1
self._data.append(HeapNode(elem, 1))
self._hashMap[elem] = self._maxIdx
self._siftUp(self._maxIdx)
else:
idx = self._hashMap[elem]
self._data[idx].cnt += 1
def heapRemove(self, elem):
"""
Remove a existing element from the Hash Heap
If the element to be removed is not in the Hash Heap, raise an error.
"""
if elem not in self._hashMap:
raise ValueError("Element to be removed is not in HashHeap!!!")
idx = self._hashMap[elem]
self._removeByIdx(idx)
def __contains__(self, value):
return value in self._hashMap
def __str__(self):
return "%s" % [elem.val for elem in self._data]
__repr__ = __str__
if __name__ == '__main__':
pass