@@ -102,7 +102,6 @@ Dict k v := {
102
102
metadata : List I8 ,
103
103
dataIndices : List Nat ,
104
104
data : List (k, v),
105
- size : Nat ,
106
105
} where k implements Hash & Eq
107
106
implements [
108
107
Eq {
@@ -137,19 +136,51 @@ toInspectorDict = \dict ->
137
136
fmt <- Inspect . custom
138
137
Inspect . apply (Inspect . dict dict walk Inspect . toInspector Inspect . toInspector ) fmt
139
138
139
+ emptyMetadata = [emptySlot, emptySlot, emptySlot, emptySlot, emptySlot, emptySlot, emptySlot, emptySlot]
140
+ emptyDataIndices = [0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ]
141
+
140
142
## Return an empty dictionary.
141
143
## ```
142
144
## emptyDict = Dict.empty {}
143
145
## ```
144
146
empty : {} -> Dict * *
145
147
empty = \{} ->
146
148
@Dict {
147
- metadata: List . repeat emptySlot 8 ,
148
- dataIndices: List . repeat 0 8 ,
149
+ metadata: emptyMetadata ,
150
+ dataIndices: emptyDataIndices ,
149
151
data: [],
150
- size: 0 ,
151
152
}
152
153
154
+ ## Return a dictionary with space allocated for a number of entries. This
155
+ ## may provide a performance optimization if you know how many entries will be
156
+ ## inserted.
157
+ withCapacity : Nat -> Dict * *
158
+ withCapacity = \size ->
159
+ if size == 0 then
160
+ empty {}
161
+ else
162
+ # Max load is 7/8.
163
+ # To avoid potential rehash, multiply size by 8/7.
164
+ # Then map to containing power of 2 to make dict indices happy.
165
+ cap =
166
+ size
167
+ |> Num . toU64
168
+ |> Num . mul 8
169
+ |> Num . divTrunc 7
170
+ |> containingPowerOfTwo
171
+ |> Num . max 8
172
+ |> Num . toNat
173
+
174
+ @Dict {
175
+ metadata: List . repeat emptySlot cap,
176
+ dataIndices: List . repeat 0 cap,
177
+ data: List . withCapacity cap,
178
+ }
179
+
180
+ containingPowerOfTwo : U64 -> U64
181
+ containingPowerOfTwo = \size ->
182
+ Num . shiftLeftBy 1 (64 - Num . countLeadingZeroBits (size - 1 ))
183
+
153
184
## Returns the max number of elements the dictionary can hold before requiring a rehash.
154
185
## ```
155
186
## foodDict =
@@ -164,14 +195,6 @@ capacity = \@Dict { dataIndices } ->
164
195
165
196
Num . subWrap cap (Num . shiftRightZfBy cap 3 )
166
197
167
- ## Return a dictionary with space allocated for a number of entries. This
168
- ## may provide a performance optimization if you know how many entries will be
169
- ## inserted.
170
- withCapacity : Nat -> Dict * *
171
- withCapacity = \_ ->
172
- # TODO: power of 2 * 8 and actual implementation
173
- empty {}
174
-
175
198
## Returns a dictionary containing the key and value provided as input.
176
199
## ```
177
200
## expect
@@ -193,8 +216,15 @@ single = \k, v ->
193
216
## ```
194
217
fromList : List (k, v) -> Dict k v where k implements Hash & Eq
195
218
fromList = \data ->
196
- # TODO: make this efficient. Should just set data and then set all indicies in the hashmap.
197
- List . walk data (empty {}) (\dict, (k, v) -> insert dict k v)
219
+ # TODO: make more efficient.
220
+ # Want to just set the data and then set all indicies in the hashmap.
221
+ # That said, we need to also deal with duplicates.
222
+
223
+ size = List . len data
224
+ if size > 0 then
225
+ List . walk data (withCapacity size) (\dict, (k, v) -> insert dict k v)
226
+ else
227
+ empty {}
198
228
199
229
## Returns the number of values in the dictionary.
200
230
## ```
@@ -207,8 +237,8 @@ fromList = \data ->
207
237
## |> Bool.isEq 3
208
238
## ```
209
239
len : Dict * * -> Nat
210
- len = \@Dict { size } ->
211
- size
240
+ len = \@Dict { data } ->
241
+ List . len data
212
242
213
243
## Check if the dictinoary is empty.
214
244
## ```
@@ -217,8 +247,8 @@ len = \@Dict { size } ->
217
247
## Dict.isEmpty (Dict.empty {})
218
248
## ```
219
249
isEmpty : Dict * * -> Bool
220
- isEmpty = \@Dict { size } ->
221
- size == 0
250
+ isEmpty = \@Dict { data } ->
251
+ List . isEmpty data
222
252
223
253
## Clears all elements from a dictionary keeping around the allocation if it isn't huge.
224
254
## ```
@@ -246,7 +276,6 @@ clear = \@Dict { metadata, dataIndices, data } ->
246
276
dataIndices,
247
277
# use takeFirst to keep around the capacity.
248
278
data: List . takeFirst data 0 ,
249
- size: 0 ,
250
279
}
251
280
252
281
## Convert each value in the dictionary to something new, by calling a conversion
@@ -424,7 +453,7 @@ contains = \@Dict { metadata, dataIndices, data }, key ->
424
453
## |> Bool.isEq (Ok 12)
425
454
## ```
426
455
insert : Dict k v, k, v -> Dict k v where k implements Hash & Eq
427
- insert = \@Dict { metadata, dataIndices, data, size }, key, value ->
456
+ insert = \@Dict { metadata, dataIndices, data }, key, value ->
428
457
hashKey =
429
458
createLowLevelHasher PseudoRandSeed
430
459
|> Hash . hash key
@@ -441,7 +470,6 @@ insert = \@Dict { metadata, dataIndices, data, size }, key, value ->
441
470
metadata,
442
471
dataIndices,
443
472
data: List . set data dataIndex (key, value),
444
- size,
445
473
}
446
474
447
475
Err NotFound ->
@@ -453,7 +481,6 @@ insert = \@Dict { metadata, dataIndices, data, size }, key, value ->
453
481
metadata,
454
482
dataIndices,
455
483
data,
456
- size: Num . addWrap size 1 ,
457
484
}
458
485
)
459
486
@@ -470,7 +497,7 @@ insert = \@Dict { metadata, dataIndices, data, size }, key, value ->
470
497
## |> Bool.isEq 0
471
498
## ```
472
499
remove : Dict k v, k -> Dict k v where k implements Hash & Eq
473
- remove = \@Dict { metadata, dataIndices, data, size }, key ->
500
+ remove = \@Dict { metadata, dataIndices, data }, key ->
474
501
# TODO: change this from swap remove to tombstone and test is performance is still good.
475
502
hashKey =
476
503
createLowLevelHasher PseudoRandSeed
@@ -490,13 +517,12 @@ remove = \@Dict { metadata, dataIndices, data, size }, key ->
490
517
metadata: List . set metadata index deletedSlot,
491
518
dataIndices,
492
519
data: List . dropLast data 1 ,
493
- size: Num . subWrap size 1 ,
494
520
}
495
521
else
496
- swapAndUpdateDataIndex (@Dict { metadata, dataIndices, data, size }) index last
522
+ swapAndUpdateDataIndex (@Dict { metadata, dataIndices, data }) index last
497
523
498
524
Err NotFound ->
499
- @Dict { metadata, dataIndices, data, size }
525
+ @Dict { metadata, dataIndices, data }
500
526
501
527
## Insert or remove a value for a specified key. This function enables a
502
528
## performance optimization for the use case of providing a default when a value
@@ -596,36 +622,49 @@ values = \@Dict { data } ->
596
622
## ```
597
623
insertAll : Dict k v, Dict k v -> Dict k v where k implements Hash & Eq
598
624
insertAll = \xs, ys ->
599
- walk ys xs insert
625
+ if len ys > len xs then
626
+ insertAll ys xs
627
+ else
628
+ walk ys xs insert
600
629
601
630
## Combine two dictionaries by keeping the [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory))
602
631
## of all the key-value pairs. This means that we keep only those pairs
603
- ## that are in both dictionaries. Note that where there are pairs with
604
- ## the same key, the value contained in the first input will be retained,
605
- ## and the value in the second input will be removed.
632
+ ## that are in both dictionaries. Both the key and value must match to be kept.
606
633
## ```
607
634
## first =
608
635
## Dict.single 1 "Keep Me"
609
636
## |> Dict.insert 2 "And Me"
637
+ ## |> Dict.insert 3 "Not this one"
610
638
##
611
639
## second =
612
640
## Dict.single 1 "Keep Me"
613
641
## |> Dict.insert 2 "And Me"
614
- ## |> Dict.insert 3 "But Not Me "
642
+ ## |> Dict.insert 3 "This has a different value "
615
643
## |> Dict.insert 4 "Or Me"
616
644
##
617
- ## expect Dict.keepShared first second == first
645
+ ## expected =
646
+ ## Dict.single 1 "Keep Me"
647
+ ## |> Dict.insert 2 "And Me"
648
+ ##
649
+ ## expect Dict.keepShared first second == expected
618
650
## ```
619
- keepShared : Dict k v, Dict k v -> Dict k v where k implements Hash & Eq
620
- keepShared = \xs, ys ->
651
+ keepShared : Dict k v, Dict k v -> Dict k v where k implements Hash & Eq , v implements Eq
652
+ keepShared = \xs0, ys0 ->
653
+ (xs1, ys1) =
654
+ if len ys0 < len xs0 then
655
+ (ys0, xs0)
656
+ else
657
+ (xs0, ys0)
621
658
walk
622
- xs
623
- (empty {} )
659
+ xs1
660
+ (withCapacity (len xs1) )
624
661
(\state, k, v ->
625
- if contains ys k then
626
- insert state k v
627
- else
628
- state
662
+ when get ys1 k is
663
+ Ok yv if v == yv ->
664
+ insert state k v
665
+
666
+ _ ->
667
+ state
629
668
)
630
669
631
670
## Remove the key-value pairs in the first input that are also in the second
@@ -653,7 +692,7 @@ removeAll = \xs, ys ->
653
692
walk ys xs (\state, k, _ -> remove state k)
654
693
655
694
swapAndUpdateDataIndex : Dict k v, Nat , Nat -> Dict k v where k implements Hash & Eq
656
- swapAndUpdateDataIndex = \@Dict { metadata, dataIndices, data, size }, removedIndex, lastIndex ->
695
+ swapAndUpdateDataIndex = \@Dict { metadata, dataIndices, data }, removedIndex, lastIndex ->
657
696
(key, _ ) = listGetUnsafe data lastIndex
658
697
hashKey =
659
698
createLowLevelHasher PseudoRandSeed
@@ -678,15 +717,14 @@ swapAndUpdateDataIndex = \@Dict { metadata, dataIndices, data, size }, removedIn
678
717
# Update index of swaped element.
679
718
dataIndices: List . set dataIndices index dataIndex,
680
719
data: nextData,
681
- size: Num . subWrap size 1 ,
682
720
}
683
721
684
722
Err NotFound ->
685
723
# This should be impossible.
686
724
crash " unreachable state in dict swapAndUpdateDataIndex hit. Definitely a standard library bug."
687
725
688
726
insertNotFoundHelper : Dict k v, k, v, U64 , I8 -> Dict k v
689
- insertNotFoundHelper = \@Dict { metadata, dataIndices, data, size }, key, value, h1Key, h2Key ->
727
+ insertNotFoundHelper = \@Dict { metadata, dataIndices, data }, key, value, h1Key, h2Key ->
690
728
probe = newProbe h1Key (div8 (List . len metadata))
691
729
index = nextEmptyOrDeletedHelper metadata probe 0
692
730
dataIndex = List . len data
@@ -696,7 +734,6 @@ insertNotFoundHelper = \@Dict { metadata, dataIndices, data, size }, key, value,
696
734
metadata: List . set metadata index h2Key,
697
735
dataIndices: List . set dataIndices index dataIndex,
698
736
data: nextData,
699
- size,
700
737
}
701
738
702
739
nextEmptyOrDeletedHelper : List I8 , Probe , Nat -> Nat
@@ -749,27 +786,26 @@ findIndexHelper = \metadata, dataIndices, data, h2Key, key, probe, offset ->
749
786
# If we aren't to the load factor yet, just ignore this.
750
787
# The container must have an updated size including any elements about to be inserted.
751
788
maybeRehash : Dict k v -> Dict k v where k implements Hash & Eq
752
- maybeRehash = \@Dict { metadata, dataIndices, data, size } ->
789
+ maybeRehash = \@Dict { metadata, dataIndices, data } ->
753
790
cap = List . len dataIndices
754
791
maxLoadCap =
755
792
# This is 7/8 * capacity, which is the max load factor.
756
793
Num . subWrap cap (Num . shiftRightZfBy cap 3 )
757
794
758
- if size > maxLoadCap then
759
- rehash (@Dict { metadata, dataIndices, data, size })
795
+ if ( List . len data + 1 ) > maxLoadCap then
796
+ rehash (@Dict { metadata, dataIndices, data })
760
797
else
761
- @Dict { metadata, dataIndices, data, size }
798
+ @Dict { metadata, dataIndices, data }
762
799
763
800
# TODO: switch rehash to iterate data and eventually clear out tombstones as well.
764
801
rehash : Dict k v -> Dict k v where k implements Hash & Eq
765
- rehash = \@Dict { metadata, dataIndices, data, size } ->
802
+ rehash = \@Dict { metadata, dataIndices, data } ->
766
803
newLen = 2 * List . len dataIndices
767
804
newDict =
768
805
@Dict {
769
806
metadata: List . repeat emptySlot newLen,
770
807
dataIndices: List . repeat 0 newLen,
771
808
data,
772
- size,
773
809
}
774
810
775
811
rehashHelper newDict metadata dataIndices data 0
@@ -796,7 +832,7 @@ rehashHelper = \dict, oldMetadata, oldDataIndices, oldData, index ->
796
832
dict
797
833
798
834
insertForRehash : Dict k v, k, Nat -> Dict k v where k implements Hash & Eq
799
- insertForRehash = \@Dict { metadata, dataIndices, data, size }, key, dataIndex ->
835
+ insertForRehash = \@Dict { metadata, dataIndices, data }, key, dataIndex ->
800
836
hashKey =
801
837
createLowLevelHasher PseudoRandSeed
802
838
|> Hash . hash key
@@ -810,7 +846,6 @@ insertForRehash = \@Dict { metadata, dataIndices, data, size }, key, dataIndex -
810
846
metadata: List . set metadata index h2Key,
811
847
dataIndices: List . set dataIndices index dataIndex,
812
848
data,
813
- size,
814
849
}
815
850
816
851
emptySlot : I8
0 commit comments