Skip to content

Use combinatorial design for sort benchmarks #298

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 121 additions & 2 deletions Manifest.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# This file is machine-generated - editing it directly is not advised

[[Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"

[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"

Expand All @@ -9,6 +12,45 @@ git-tree-sha1 = "61adeb0823084487000600ef8b1c00cc2474cd47"
uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
version = "1.2.0"

[[ChainRulesCore]]
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
git-tree-sha1 = "80ca332f6dcb2508adba68f22f551adb2d00a624"
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
version = "1.15.3"

[[ChangesOfVariables]]
deps = ["ChainRulesCore", "LinearAlgebra", "Test"]
git-tree-sha1 = "38f7a08f19d8810338d4f5085211c7dfa5d5bdd8"
uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
version = "0.1.4"

[[Combinatorics]]
git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860"
uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
version = "1.0.2"

[[Compat]]
deps = ["Dates", "LinearAlgebra", "UUIDs"]
git-tree-sha1 = "924cdca592bc16f14d2f7006754a621735280b74"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "4.1.0"

[[CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
version = "0.5.2+0"

[[DataAPI]]
git-tree-sha1 = "fb5f5316dd3fd4c5e7c30a24d50643b73e37cd40"
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
version = "1.10.0"

[[DataStructures]]
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "d1fff3a548102f48987a52a2e0d114fa97d730f0"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.18.13"

[[Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand All @@ -17,33 +59,80 @@ uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"

[[DocStringExtensions]]
deps = ["LibGit2"]
git-tree-sha1 = "5158c2b41018c5f7eb1470d558127ac274eca0c9"
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
version = "0.9.1"

[[InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[[InverseFunctions]]
deps = ["Test"]
git-tree-sha1 = "b3364212fb5d870f724876ffcd34dd8ec6d98918"
uuid = "3587e190-3f89-42d0-90ee-14403ec27112"
version = "0.1.7"

[[IrrationalConstants]]
git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151"
uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
version = "0.1.1"

[[JSON]]
deps = ["Dates", "Mmap", "Parsers", "Unicode"]
git-tree-sha1 = "8076680b162ada2a031f707ac7b4953e30667a37"
uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
version = "0.21.2"

[[LibGit2]]
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"

[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

[[LinearAlgebra]]
deps = ["Libdl"]
deps = ["Libdl", "libblastrampoline_jll"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

[[LogExpFunctions]]
deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"]
git-tree-sha1 = "361c2b088575b07946508f135ac556751240091c"
uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
version = "0.3.17"

[[Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"

[[Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"

[[Missings]]
deps = ["DataAPI"]
git-tree-sha1 = "bf210ce90b6c9eed32d25dbcae1ebc565df2687f"
uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
version = "1.0.2"

[[Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"

[[NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
version = "1.2.0"

[[OpenBLAS_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
version = "0.3.20+0"

[[OrderedCollections]]
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.4.1"

[[Parsers]]
deps = ["Dates"]
git-tree-sha1 = "ae4bbcadb2906ccc085cf52ac286dc1377dceccc"
Expand All @@ -63,18 +152,25 @@ deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"

[[Random]]
deps = ["Serialization"]
deps = ["SHA", "Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[[SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
version = "0.7.0"

[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"

[[Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"

[[SortingAlgorithms]]
deps = ["DataStructures"]
git-tree-sha1 = "b3363d7460f7d098ca0912c69b082f75625d7508"
uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
version = "1.0.1"

[[SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Expand All @@ -89,6 +185,18 @@ version = "1.0.0"
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[[StatsAPI]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "f9af7f195fb13589dd2e2d57fdb401717d2eb1f6"
uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
version = "1.5.0"

[[StatsBase]]
deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"]
git-tree-sha1 = "d1bf48bfcc554a3761a133fe3a9bb01488e06916"
uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
version = "0.33.21"

[[Test]]
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand All @@ -99,3 +207,14 @@ uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[[UnitTestDesign]]
deps = ["Combinatorics", "Random"]
git-tree-sha1 = "68c148902cbe720d5d5d004fcc06ebc57db2fdbe"
uuid = "239896fa-e45a-40e8-9993-3c434b0bc450"
version = "0.3.0"

[[libblastrampoline_jll]]
deps = ["Artifacts", "Libdl", "OpenBLAS_jll"]
uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
version = "5.1.1+0"
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
UnitTestDesign = "239896fa-e45a-40e8-9993-3c434b0bc450"

[compat]
BenchmarkTools = "1"
Expand Down
154 changes: 118 additions & 36 deletions src/sort/SortBenchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,51 +4,133 @@ include(joinpath(dirname(@__FILE__), "..", "utils", "RandUtils.jl"))

using .RandUtils
using BenchmarkTools
using StatsBase: sample
using UnitTestDesign: all_pairs
using Base.Order
using Random

const SUITE = BenchmarkGroup()
const LIST_SIZE = 50000
const LISTS = (
("ascending", collect(1:LIST_SIZE)),
("descending", collect(LIST_SIZE:-1:1)),
("ones", ones(LIST_SIZE)),
("random", samerand(LIST_SIZE))
)

#####################################
# QuickSort/MergeSort/InsertionSort #
#####################################

for (group, Alg) in (("quicksort", QuickSort), ("mergesort", MergeSort), ("insertionsort", InsertionSort))
g = addgroup!(SUITE, group)
for (kind, list) in LISTS
ix = collect(1:length(list))
g["sort forwards", kind] = @benchmarkable sort($list; alg = $Alg)
g["sort reverse", kind] = @benchmarkable sort($list; alg = $Alg, rev = true)
g["sortperm forwards", kind] = @benchmarkable sortperm($list; alg = $Alg)
g["sortperm reverse", kind] = @benchmarkable sortperm($list; alg = $Alg, rev = true)
g["sort! forwards", kind] = @benchmarkable sort!(x; alg = $Alg) setup=(x = copy($list))
g["sort! reverse", kind] = @benchmarkable sort!(x; alg = $Alg, rev = true) setup=(x = copy($list))
g["sortperm! forwards", kind] = @benchmarkable sortperm!(x, $list; alg = $Alg) setup=(x = copy($ix))
g["sortperm! reverse", kind] = @benchmarkable sortperm!(x, $list; alg = $Alg, rev = true) setup=(x = copy($ix))
SUITE = BenchmarkGroup()

#=
Benchmarked:

Various bitwidths (1-128)
Various types (Integer, floating point, char)
Many input orderings
Forward and reverse sorting
Mutating and nonmutating
partialsort
sortperm
lengths 1—6_572_799

Not yet benchmarked:

Nonuniform distributions
Highly uniform distributions
Collections of runs (e.g. ascending saw, descending saw)
Pathological input orders
Non isbits types
By and lt orders
Lengths greater than 6_572_799
=#

# Ways of constructing an interestingly ordered input vector
ascending(x, len) = sample(x, len, replace=false, ordered=true)
descending(x, len) = reverse!(sample(x, len, replace=false, ordered=true))
unique(f; n, len) = x -> rand(f(x, n), len)
exchanges(f; n, len) = x -> begin
res = f(x, len)
for _ in 1:n
i, j = rand(eachindex(res), 2)
res[i], res[j] = res[j], res[i]
end
res
end
random_prepended(f; n, len) = x -> begin
res = f(x, len)
for i in firstindex(res):min(lastindex(res), firstindex(res) + n - 1)
j = rand(i:lastindex(res))
res[i], res[j] = res[j], res[i]
end
for b in values(g)
b.params.time_tolerance = 0.30
res
end
random_appended(f; n, len) = x -> begin
res = f(x, len)
for i in lastindex(res):-1:max(firstindex(res), lastindex(res) - n + 1)
j = rand(firstindex(res):i)
res[i], res[j] = res[j], res[i]
end
res
end
unmodified(f; n, len) = x -> f(x, len)

############
# issorted #
############
function make!(suite, len, func, partial_target, rev, source, input_order_root, input_order_modifier, n_func)
rough_len = len <= lens[length(lens) ÷ 3] ? "small" : len <= lens[length(lens) * 2 ÷ 3] ? "medium" : "large"
kwrev = isempty(rev) ? () : (rev=true,)
n = min(len, max(1, n_func(len)))
sfunc = string(func)
if rough_len == "large"
input_order_name = "rand"
setup = :(rand($source, $len))
else
if input_order_root == rand && input_order_modifier ∈ [exchanges, random_prepended, random_appended]
input_order_modifier = unmodified
end
if input_order_modifier == unique && n == 1
input_order_root = rand
end
input_order_name = (input_order_modifier == unmodified ? "$input_order_root" : "$input_order_root with $n $input_order_modifier")
order = isempty(rev) ? Forward : Reverse

generator = input_order_modifier(input_order_root; n, len)
deck = decks[source]
setup = :($generator($deck))
end

ix = if endswith(sfunc, "sortperm!")
(randperm(len),)
else () end

partial = if startswith(sfunc, "partial")
vals = [1, n, len-n+1, len]
lo, hi = extrema(partial_target)
((lo == hi ? vals[lo] : (vals[lo]:vals[hi])),)
else () end

expr = :(@benchmarkable $func($ix..., x, $partial...; $kwrev...) setup=(x = $setup))
endswith(sfunc, '!') && push!(expr.args, :(evals = 1))

suite[len, rough_len, sfunc, input_order_name, rev...] = eval(expr)
end

g = addgroup!(SUITE, "issorted")
lens = round.(Int, 1.303483 .^ (1:30) .^ 1.2)
sources = [Float64, Float32, Float16, Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128, 1:10, 1:100, 1:1000, 1:10_000, 1:100_000, Char, Bool]
decks = Dict([s => sort!(rand(s, 3lens[length(lens) * 2 ÷ 3])) for s in sources])

for (kind, list) in LISTS
g["forwards", kind] = @benchmarkable issorted($list)
g["reverse", kind] = @benchmarkable issorted($list; rev = true)
# Axes of the combinatorial combination
g = addgroup!(SUITE, "quick")
for (len, func, partial_target, rev, source, input_order_root, n_func) in all_pairs(
lens,
[sort, sort!, sortperm, partialsort],
[1:2, 2:2],
[(), ("rev",)],
[Float64, Float32, Int16, Int64, Int128, UInt8, UInt32, UInt64, 1:10, 1:10_000],
[rand, ascending],
[_->4, len->len÷4])
make!(g, len, func, partial_target, rev, source, input_order_root, unmodified, n_func)
end

for b in values(g)
b.params.time_tolerance = 0.30
g = addgroup!(SUITE, "full")
for (len, func, partial_target, rev, source, input_order_root, input_order_modifier, n_func) in all_pairs(
lens,
[sort, sort!, sortperm, sortperm!, partialsort, partialsort!, partialsortperm, partialsortperm!, issorted],
[1:1, 1:2, 1:3, 1:4, 2:2, 2:3, 2:4, 3:3, 3:4, 4:4],
[(), ("rev",)],
sources,
[rand, ascending, descending],
[unmodified, unique, exchanges, random_prepended, random_appended],
[_->1, _->4, _->20, len->len÷20, len->len÷4, identity])
make!(g, len, func, partial_target, rev, source, input_order_root, input_order_modifier, n_func)
end

end # module