Skip to content

Commit 81eff6a

Browse files
authored
Merge pull request #6176 from roc-lang/set-perf
Improve perf of Dict and Set
2 parents 9889984 + 64df1c6 commit 81eff6a

File tree

11 files changed

+1072
-987
lines changed

11 files changed

+1072
-987
lines changed

crates/compiler/builtins/bitcode/src/num.zig

+6-6
Original file line numberDiff line numberDiff line change
@@ -633,26 +633,26 @@ pub fn exportMulOrPanic(comptime T: type, comptime W: type, comptime name: []con
633633

634634
pub fn exportCountLeadingZeroBits(comptime T: type, comptime name: []const u8) void {
635635
comptime var f = struct {
636-
fn func(self: T) callconv(.C) usize {
637-
return @as(usize, @clz(self));
636+
fn func(self: T) callconv(.C) u8 {
637+
return @as(u8, @clz(self));
638638
}
639639
}.func;
640640
@export(f, .{ .name = name ++ @typeName(T), .linkage = .Strong });
641641
}
642642

643643
pub fn exportCountTrailingZeroBits(comptime T: type, comptime name: []const u8) void {
644644
comptime var f = struct {
645-
fn func(self: T) callconv(.C) usize {
646-
return @as(usize, @ctz(self));
645+
fn func(self: T) callconv(.C) u8 {
646+
return @as(u8, @ctz(self));
647647
}
648648
}.func;
649649
@export(f, .{ .name = name ++ @typeName(T), .linkage = .Strong });
650650
}
651651

652652
pub fn exportCountOneBits(comptime T: type, comptime name: []const u8) void {
653653
comptime var f = struct {
654-
fn func(self: T) callconv(.C) usize {
655-
return @as(usize, @popCount(self));
654+
fn func(self: T) callconv(.C) u8 {
655+
return @as(u8, @popCount(self));
656656
}
657657
}.func;
658658
@export(f, .{ .name = name ++ @typeName(T), .linkage = .Strong });

crates/compiler/builtins/roc/Dict.roc

+87-52
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,6 @@ Dict k v := {
102102
metadata : List I8,
103103
dataIndices : List Nat,
104104
data : List (k, v),
105-
size : Nat,
106105
} where k implements Hash & Eq
107106
implements [
108107
Eq {
@@ -137,19 +136,51 @@ toInspectorDict = \dict ->
137136
fmt <- Inspect.custom
138137
Inspect.apply (Inspect.dict dict walk Inspect.toInspector Inspect.toInspector) fmt
139138

139+
emptyMetadata = [emptySlot, emptySlot, emptySlot, emptySlot, emptySlot, emptySlot, emptySlot, emptySlot]
140+
emptyDataIndices = [0, 0, 0, 0, 0, 0, 0, 0]
141+
140142
## Return an empty dictionary.
141143
## ```
142144
## emptyDict = Dict.empty {}
143145
## ```
144146
empty : {} -> Dict * *
145147
empty = \{} ->
146148
@Dict {
147-
metadata: List.repeat emptySlot 8,
148-
dataIndices: List.repeat 0 8,
149+
metadata: emptyMetadata,
150+
dataIndices: emptyDataIndices,
149151
data: [],
150-
size: 0,
151152
}
152153

154+
## Return a dictionary with space allocated for a number of entries. This
155+
## may provide a performance optimization if you know how many entries will be
156+
## inserted.
157+
withCapacity : Nat -> Dict * *
158+
withCapacity = \size ->
159+
if size == 0 then
160+
empty {}
161+
else
162+
# Max load is 7/8.
163+
# To avoid potential rehash, multiply size by 8/7.
164+
# Then map to containing power of 2 to make dict indices happy.
165+
cap =
166+
size
167+
|> Num.toU64
168+
|> Num.mul 8
169+
|> Num.divTrunc 7
170+
|> containingPowerOfTwo
171+
|> Num.max 8
172+
|> Num.toNat
173+
174+
@Dict {
175+
metadata: List.repeat emptySlot cap,
176+
dataIndices: List.repeat 0 cap,
177+
data: List.withCapacity cap,
178+
}
179+
180+
containingPowerOfTwo : U64 -> U64
181+
containingPowerOfTwo = \size ->
182+
Num.shiftLeftBy 1 (64 - Num.countLeadingZeroBits (size - 1))
183+
153184
## Returns the max number of elements the dictionary can hold before requiring a rehash.
154185
## ```
155186
## foodDict =
@@ -164,14 +195,6 @@ capacity = \@Dict { dataIndices } ->
164195

165196
Num.subWrap cap (Num.shiftRightZfBy cap 3)
166197

167-
## Return a dictionary with space allocated for a number of entries. This
168-
## may provide a performance optimization if you know how many entries will be
169-
## inserted.
170-
withCapacity : Nat -> Dict * *
171-
withCapacity = \_ ->
172-
# TODO: power of 2 * 8 and actual implementation
173-
empty {}
174-
175198
## Returns a dictionary containing the key and value provided as input.
176199
## ```
177200
## expect
@@ -193,8 +216,15 @@ single = \k, v ->
193216
## ```
194217
fromList : List (k, v) -> Dict k v where k implements Hash & Eq
195218
fromList = \data ->
196-
# TODO: make this efficient. Should just set data and then set all indicies in the hashmap.
197-
List.walk data (empty {}) (\dict, (k, v) -> insert dict k v)
219+
# TODO: make more efficient.
220+
# Want to just set the data and then set all indicies in the hashmap.
221+
# That said, we need to also deal with duplicates.
222+
223+
size = List.len data
224+
if size > 0 then
225+
List.walk data (withCapacity size) (\dict, (k, v) -> insert dict k v)
226+
else
227+
empty {}
198228

199229
## Returns the number of values in the dictionary.
200230
## ```
@@ -207,8 +237,8 @@ fromList = \data ->
207237
## |> Bool.isEq 3
208238
## ```
209239
len : Dict * * -> Nat
210-
len = \@Dict { size } ->
211-
size
240+
len = \@Dict { data } ->
241+
List.len data
212242

213243
## Check if the dictinoary is empty.
214244
## ```
@@ -217,8 +247,8 @@ len = \@Dict { size } ->
217247
## Dict.isEmpty (Dict.empty {})
218248
## ```
219249
isEmpty : Dict * * -> Bool
220-
isEmpty = \@Dict { size } ->
221-
size == 0
250+
isEmpty = \@Dict { data } ->
251+
List.isEmpty data
222252

223253
## Clears all elements from a dictionary keeping around the allocation if it isn't huge.
224254
## ```
@@ -246,7 +276,6 @@ clear = \@Dict { metadata, dataIndices, data } ->
246276
dataIndices,
247277
# use takeFirst to keep around the capacity.
248278
data: List.takeFirst data 0,
249-
size: 0,
250279
}
251280

252281
## Convert each value in the dictionary to something new, by calling a conversion
@@ -424,7 +453,7 @@ contains = \@Dict { metadata, dataIndices, data }, key ->
424453
## |> Bool.isEq (Ok 12)
425454
## ```
426455
insert : Dict k v, k, v -> Dict k v where k implements Hash & Eq
427-
insert = \@Dict { metadata, dataIndices, data, size }, key, value ->
456+
insert = \@Dict { metadata, dataIndices, data }, key, value ->
428457
hashKey =
429458
createLowLevelHasher PseudoRandSeed
430459
|> Hash.hash key
@@ -441,7 +470,6 @@ insert = \@Dict { metadata, dataIndices, data, size }, key, value ->
441470
metadata,
442471
dataIndices,
443472
data: List.set data dataIndex (key, value),
444-
size,
445473
}
446474

447475
Err NotFound ->
@@ -453,7 +481,6 @@ insert = \@Dict { metadata, dataIndices, data, size }, key, value ->
453481
metadata,
454482
dataIndices,
455483
data,
456-
size: Num.addWrap size 1,
457484
}
458485
)
459486

@@ -470,7 +497,7 @@ insert = \@Dict { metadata, dataIndices, data, size }, key, value ->
470497
## |> Bool.isEq 0
471498
## ```
472499
remove : Dict k v, k -> Dict k v where k implements Hash & Eq
473-
remove = \@Dict { metadata, dataIndices, data, size }, key ->
500+
remove = \@Dict { metadata, dataIndices, data }, key ->
474501
# TODO: change this from swap remove to tombstone and test is performance is still good.
475502
hashKey =
476503
createLowLevelHasher PseudoRandSeed
@@ -490,13 +517,12 @@ remove = \@Dict { metadata, dataIndices, data, size }, key ->
490517
metadata: List.set metadata index deletedSlot,
491518
dataIndices,
492519
data: List.dropLast data 1,
493-
size: Num.subWrap size 1,
494520
}
495521
else
496-
swapAndUpdateDataIndex (@Dict { metadata, dataIndices, data, size }) index last
522+
swapAndUpdateDataIndex (@Dict { metadata, dataIndices, data }) index last
497523

498524
Err NotFound ->
499-
@Dict { metadata, dataIndices, data, size }
525+
@Dict { metadata, dataIndices, data }
500526

501527
## Insert or remove a value for a specified key. This function enables a
502528
## performance optimization for the use case of providing a default when a value
@@ -596,36 +622,49 @@ values = \@Dict { data } ->
596622
## ```
597623
insertAll : Dict k v, Dict k v -> Dict k v where k implements Hash & Eq
598624
insertAll = \xs, ys ->
599-
walk ys xs insert
625+
if len ys > len xs then
626+
insertAll ys xs
627+
else
628+
walk ys xs insert
600629

601630
## Combine two dictionaries by keeping the [intersection](https://en.wikipedia.org/wiki/Intersection_(set_theory))
602631
## of all the key-value pairs. This means that we keep only those pairs
603-
## that are in both dictionaries. Note that where there are pairs with
604-
## the same key, the value contained in the first input will be retained,
605-
## and the value in the second input will be removed.
632+
## that are in both dictionaries. Both the key and value must match to be kept.
606633
## ```
607634
## first =
608635
## Dict.single 1 "Keep Me"
609636
## |> Dict.insert 2 "And Me"
637+
## |> Dict.insert 3 "Not this one"
610638
##
611639
## second =
612640
## Dict.single 1 "Keep Me"
613641
## |> Dict.insert 2 "And Me"
614-
## |> Dict.insert 3 "But Not Me"
642+
## |> Dict.insert 3 "This has a different value"
615643
## |> Dict.insert 4 "Or Me"
616644
##
617-
## expect Dict.keepShared first second == first
645+
## expected =
646+
## Dict.single 1 "Keep Me"
647+
## |> Dict.insert 2 "And Me"
648+
##
649+
## expect Dict.keepShared first second == expected
618650
## ```
619-
keepShared : Dict k v, Dict k v -> Dict k v where k implements Hash & Eq
620-
keepShared = \xs, ys ->
651+
keepShared : Dict k v, Dict k v -> Dict k v where k implements Hash & Eq, v implements Eq
652+
keepShared = \xs0, ys0 ->
653+
(xs1, ys1) =
654+
if len ys0 < len xs0 then
655+
(ys0, xs0)
656+
else
657+
(xs0, ys0)
621658
walk
622-
xs
623-
(empty {})
659+
xs1
660+
(withCapacity (len xs1))
624661
(\state, k, v ->
625-
if contains ys k then
626-
insert state k v
627-
else
628-
state
662+
when get ys1 k is
663+
Ok yv if v == yv ->
664+
insert state k v
665+
666+
_ ->
667+
state
629668
)
630669

631670
## Remove the key-value pairs in the first input that are also in the second
@@ -653,7 +692,7 @@ removeAll = \xs, ys ->
653692
walk ys xs (\state, k, _ -> remove state k)
654693

655694
swapAndUpdateDataIndex : Dict k v, Nat, Nat -> Dict k v where k implements Hash & Eq
656-
swapAndUpdateDataIndex = \@Dict { metadata, dataIndices, data, size }, removedIndex, lastIndex ->
695+
swapAndUpdateDataIndex = \@Dict { metadata, dataIndices, data }, removedIndex, lastIndex ->
657696
(key, _) = listGetUnsafe data lastIndex
658697
hashKey =
659698
createLowLevelHasher PseudoRandSeed
@@ -678,15 +717,14 @@ swapAndUpdateDataIndex = \@Dict { metadata, dataIndices, data, size }, removedIn
678717
# Update index of swaped element.
679718
dataIndices: List.set dataIndices index dataIndex,
680719
data: nextData,
681-
size: Num.subWrap size 1,
682720
}
683721

684722
Err NotFound ->
685723
# This should be impossible.
686724
crash "unreachable state in dict swapAndUpdateDataIndex hit. Definitely a standard library bug."
687725

688726
insertNotFoundHelper : Dict k v, k, v, U64, I8 -> Dict k v
689-
insertNotFoundHelper = \@Dict { metadata, dataIndices, data, size }, key, value, h1Key, h2Key ->
727+
insertNotFoundHelper = \@Dict { metadata, dataIndices, data }, key, value, h1Key, h2Key ->
690728
probe = newProbe h1Key (div8 (List.len metadata))
691729
index = nextEmptyOrDeletedHelper metadata probe 0
692730
dataIndex = List.len data
@@ -696,7 +734,6 @@ insertNotFoundHelper = \@Dict { metadata, dataIndices, data, size }, key, value,
696734
metadata: List.set metadata index h2Key,
697735
dataIndices: List.set dataIndices index dataIndex,
698736
data: nextData,
699-
size,
700737
}
701738

702739
nextEmptyOrDeletedHelper : List I8, Probe, Nat -> Nat
@@ -749,27 +786,26 @@ findIndexHelper = \metadata, dataIndices, data, h2Key, key, probe, offset ->
749786
# If we aren't to the load factor yet, just ignore this.
750787
# The container must have an updated size including any elements about to be inserted.
751788
maybeRehash : Dict k v -> Dict k v where k implements Hash & Eq
752-
maybeRehash = \@Dict { metadata, dataIndices, data, size } ->
789+
maybeRehash = \@Dict { metadata, dataIndices, data } ->
753790
cap = List.len dataIndices
754791
maxLoadCap =
755792
# This is 7/8 * capacity, which is the max load factor.
756793
Num.subWrap cap (Num.shiftRightZfBy cap 3)
757794

758-
if size > maxLoadCap then
759-
rehash (@Dict { metadata, dataIndices, data, size })
795+
if (List.len data + 1) > maxLoadCap then
796+
rehash (@Dict { metadata, dataIndices, data })
760797
else
761-
@Dict { metadata, dataIndices, data, size }
798+
@Dict { metadata, dataIndices, data }
762799

763800
# TODO: switch rehash to iterate data and eventually clear out tombstones as well.
764801
rehash : Dict k v -> Dict k v where k implements Hash & Eq
765-
rehash = \@Dict { metadata, dataIndices, data, size } ->
802+
rehash = \@Dict { metadata, dataIndices, data } ->
766803
newLen = 2 * List.len dataIndices
767804
newDict =
768805
@Dict {
769806
metadata: List.repeat emptySlot newLen,
770807
dataIndices: List.repeat 0 newLen,
771808
data,
772-
size,
773809
}
774810

775811
rehashHelper newDict metadata dataIndices data 0
@@ -796,7 +832,7 @@ rehashHelper = \dict, oldMetadata, oldDataIndices, oldData, index ->
796832
dict
797833

798834
insertForRehash : Dict k v, k, Nat -> Dict k v where k implements Hash & Eq
799-
insertForRehash = \@Dict { metadata, dataIndices, data, size }, key, dataIndex ->
835+
insertForRehash = \@Dict { metadata, dataIndices, data }, key, dataIndex ->
800836
hashKey =
801837
createLowLevelHasher PseudoRandSeed
802838
|> Hash.hash key
@@ -810,7 +846,6 @@ insertForRehash = \@Dict { metadata, dataIndices, data, size }, key, dataIndex -
810846
metadata: List.set metadata index h2Key,
811847
dataIndices: List.set dataIndices index dataIndex,
812848
data,
813-
size,
814849
}
815850

816851
emptySlot : I8

crates/compiler/builtins/roc/Num.roc

+3-3
Original file line numberDiff line numberDiff line change
@@ -1112,7 +1112,7 @@ powInt : Int a, Int a -> Int a
11121112
##
11131113
## 8
11141114
## ```
1115-
countLeadingZeroBits : Int a -> Nat
1115+
countLeadingZeroBits : Int a -> U8
11161116

11171117
## Counts the number of least-significant (trailing in a big-Endian sense) zeroes in an integer.
11181118
##
@@ -1125,7 +1125,7 @@ countLeadingZeroBits : Int a -> Nat
11251125
##
11261126
## 8
11271127
## ```
1128-
countTrailingZeroBits : Int a -> Nat
1128+
countTrailingZeroBits : Int a -> U8
11291129

11301130
## Counts the number of set bits in an integer.
11311131
##
@@ -1138,7 +1138,7 @@ countTrailingZeroBits : Int a -> Nat
11381138
##
11391139
## 0
11401140
## ```
1141-
countOneBits : Int a -> Nat
1141+
countOneBits : Int a -> U8
11421142

11431143
addWrap : Int range, Int range -> Int range
11441144

crates/compiler/builtins/roc/Set.roc

+4-3
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,10 @@ toList = \@Set dict ->
237237
## ```
238238
fromList : List k -> Set k where k implements Hash & Eq
239239
fromList = \list ->
240-
initial = @Set (Dict.withCapacity (List.len list))
241-
242-
List.walk list initial insert
240+
list
241+
|> List.map \k -> (k, {})
242+
|> Dict.fromList
243+
|> @Set
243244

244245
## Combine two `Set` collection by keeping the
245246
## [union](https://en.wikipedia.org/wiki/Union_(set_theory))

0 commit comments

Comments
 (0)