Skip to content

WIP: linuxperf and other experiments #388

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions ext/LinuxPerfExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
module LinuxPerfExt

import LinuxPerf: LinuxPerf, PerfBench, EventGroup, EventType
import LinuxPerf: enable!, disable!, enable_all!, disable_all!, close, read!

export LinuxPerfParameters

Base.@kwdef struct LinuxPerfParameters
g = EventGroup([EventType(:hw, :instructions), EventType(:hw, :branches)])
params = BenchmarkTools.Parameters()
end

function BenchmarkTools.prehook(evals, params::LinuxPerfParameters)
state = BenchmarkTools.prehook(evals, params.params)
bench = PerfBench(0, params.g)
enable!(bench)
(state, bench)
end

function BenchmarkTools.posthook((state, bench), evals, params::LinuxPerfParameters)
disable!(bench)
result = BenchmarkTools.posthook(state, evals, params.params)
(N, time_enabled, time_running, insts, branches) = read!(
bench.groups.leader_io, Vector{UInt64}(undef, 5)
)
if 2 * time_running <= time_enabled
# enabled less than 50% of the time
# (most likely due to PMU contention with other perf events)
return (NaN, NaN)
else
# account for partially-active measurement
k = time_enabled / time_running
estimated_instructions = Float64(insts) * k
estimated_branches = Float64(branches) * k
return (estimated_instructions, estimated_branches)
end
close(bench)
return (__sample_instructions, __sample_branches, result)
end
59 changes: 34 additions & 25 deletions src/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -110,18 +110,18 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, k
params = Parameters(p; kwargs...)
@assert params.seconds > 0.0 "time limit must be greater than 0.0"
if warmup
b.samplefunc(b.quote_vals, Parameters(params; evals=1)) #warmup sample
b.samplefunc(1, b.quote_vals, Parameters(params; evals=1)) #warmup sample
end
trial = Trial(params)
params.gctrial && gcscrub()
start_time = Base.time()
s = b.samplefunc(b.quote_vals, params)
s = b.samplefunc(params.evals, b.quote_vals, params)
push!(trial, s[1:(end - 1)]...)
return_val = s[end]
iters = 2
while (Base.time() - start_time) < params.seconds && iters ≤ params.samples
params.gcsample && gcscrub()
push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
push!(trial, b.samplefunc(params.evals, b.quote_vals, params)[1:(end - 1)]...)
iters += 1
end
return trial, return_val
Expand Down Expand Up @@ -183,13 +183,13 @@ function _lineartrial(b::Benchmark, p::Parameters=b.params; maxevals=RESOLUTION,
estimates = zeros(maxevals)
completed = 0
params.evals = 1
b.samplefunc(b.quote_vals, params) #warmup sample
b.samplefunc(params.evals, b.quote_vals, params) #warmup sample
params.gctrial && gcscrub()
start_time = time()
for evals in eachindex(estimates)
params.gcsample && gcscrub()
params.evals = evals
estimates[evals] = first(b.samplefunc(b.quote_vals, params))
estimates[evals] = first(b.samplefunc(params.evals, b.quote_vals, params))
completed += 1
((time() - start_time) > params.seconds) && break
end
Expand Down Expand Up @@ -560,38 +560,47 @@ function generate_benchmark_definition(
$(core_body)
end
@noinline function $(samplefunc)(
$(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
)
__evals,
$(Expr(:tuple, quote_vars...)), __params::__P
) where {__P}
$(setup)
__evals = __params.evals
__gc_start = Base.gc_num()
__start_time = time_ns()
__state = BenchmarkTools.prehook(__evals, __params)
__return_val = $(invocation)
for __iter in 2:__evals
$(invocation)
end
__sample_time = time_ns() - __start_time
__gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
__result = BenchmarkTools.__posthook(__state, __evals, __params)
$(teardown)
__time = max((__sample_time / __evals) - __params.overhead, 0.001)
__gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
__memory = Int(Base.fld(__gcdiff.allocd, __evals))
__allocs = Int(
Base.fld(
__gcdiff.malloc +
__gcdiff.realloc +
__gcdiff.poolalloc +
__gcdiff.bigalloc,
__evals,
),
)
return __time, __gctime, __memory, __allocs, __return_val
end
$BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
end,
)
end

function prehook(evals, params::Parameters)
gc_start = Base.gc_num()
start_time = time_ns()
end

function posthook((gc_start, start_time), evals, params::Parameters)
sample_time = time_ns() - start_time
gcdiff = Base.GC_Diff(Base.gc_num(), gc_start)
time = max((sample_time / evals) - params.overhead, 0.001)
gctime = max((gcdiff.total_time / evals) - params.overhead, 0.0)
memory = Int(Base.fld(gcdiff.allocd, evals))
allocs = Int(
Base.fld(
gcdiff.malloc +
gcdiff.realloc +
gcdiff.poolalloc +
gcdiff.bigalloc,
evals,
),
)
return time, gctime, memory, allocs, return_val
end


######################
# convenience macros #
######################
Expand Down
Loading