From 1afc4bd4daa81b1b986833c205260d2ccefa2fae Mon Sep 17 00:00:00 2001 From: Willow Ahrens Date: Tue, 24 Dec 2024 14:11:13 -0500 Subject: [PATCH 1/2] add parameterization --- src/LinuxPerf.jl | 3 +++ src/execution.jl | 48 ++++++++++++++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 20 deletions(-) create mode 100644 src/LinuxPerf.jl diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl new file mode 100644 index 00000000..d65b613b --- /dev/null +++ b/src/LinuxPerf.jl @@ -0,0 +1,3 @@ +struct LinuxPerfParameters + +end \ No newline at end of file diff --git a/src/execution.jl b/src/execution.jl index 295ca608..c5998543 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -560,38 +560,46 @@ function generate_benchmark_definition( $(core_body) end @noinline function $(samplefunc)( - $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters - ) + __evals, + $(Expr(:tuple, quote_vars...)), __params::__P + ) where {__P} $(setup) - __evals = __params.evals - __gc_start = Base.gc_num() - __start_time = time_ns() + __state = BenchmarkTools.prehook(__evals, __params) __return_val = $(invocation) for __iter in 2:__evals $(invocation) end - __sample_time = time_ns() - __start_time - __gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start) + __result = BenchmarkTools.__posthook(__state, __evals, __params) $(teardown) - __time = max((__sample_time / __evals) - __params.overhead, 0.001) - __gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0) - __memory = Int(Base.fld(__gcdiff.allocd, __evals)) - __allocs = Int( - Base.fld( - __gcdiff.malloc + - __gcdiff.realloc + - __gcdiff.poolalloc + - __gcdiff.bigalloc, - __evals, - ), - ) - return __time, __gctime, __memory, __allocs, __return_val end $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params)) end, ) end +function prehook(evals, params::Parameters) + gc_start = Base.gc_num() + start_time = time_ns() +end + +function posthook((gc_start, start_time), evals, params::Parameters) + sample_time = time_ns() - start_time + gcdiff = Base.GC_Diff(Base.gc_num(), gc_start) + time = max((sample_time / evals) - params.overhead, 0.001) + gctime = max((gcdiff.total_time / evals) - params.overhead, 0.0) + memory = Int(Base.fld(gcdiff.allocd, evals)) + allocs = Int( + Base.fld( + gcdiff.malloc + + gcdiff.realloc + + gcdiff.poolalloc + + gcdiff.bigalloc, + evals, + ), + ) + return time, gctime, memory, allocs, return_val +end + ###################### # convenience macros # ###################### From 126a1a7388327cbce64253a311a5362bfbf420af Mon Sep 17 00:00:00 2001 From: Willow Ahrens Date: Thu, 26 Dec 2024 15:54:54 -0500 Subject: [PATCH 2/2] move linuxperf into an extension --- ext/LinuxPerfExt.jl | 39 +++++++++++++++++++++++++++++++++++++++ src/LinuxPerf.jl | 3 --- src/execution.jl | 11 ++++++----- 3 files changed, 45 insertions(+), 8 deletions(-) create mode 100644 ext/LinuxPerfExt.jl delete mode 100644 src/LinuxPerf.jl diff --git a/ext/LinuxPerfExt.jl b/ext/LinuxPerfExt.jl new file mode 100644 index 00000000..f7bad795 --- /dev/null +++ b/ext/LinuxPerfExt.jl @@ -0,0 +1,39 @@ +module LinuxPerfExt + +import LinuxPerf: LinuxPerf, PerfBench, EventGroup, EventType +import LinuxPerf: enable!, disable!, enable_all!, disable_all!, close, read! + +export LinuxPerfParameters + +Base.@kwdef struct LinuxPerfParameters + g = EventGroup([EventType(:hw, :instructions), EventType(:hw, :branches)]) + params = BenchmarkTools.Parameters() +end + +function BenchmarkTools.prehook(evals, params::LinuxPerfParameters) + state = BenchmarkTools.prehook(evals, params.params) + bench = PerfBench(0, params.g) + enable!(bench) + (state, bench) +end + +function BenchmarkTools.posthook((state, bench), evals, params::LinuxPerfParameters) + disable!(bench) + result = BenchmarkTools.posthook(state, evals, params.params) + (N, time_enabled, time_running, insts, branches) = read!( + bench.groups.leader_io, Vector{UInt64}(undef, 5) + ) + if 2 * time_running <= time_enabled + # enabled less than 50% of the time + # (most likely due to PMU contention with other perf events) + return (NaN, NaN) + else + # account for partially-active measurement + k = time_enabled / time_running + estimated_instructions = Float64(insts) * k + estimated_branches = Float64(branches) * k + return (estimated_instructions, estimated_branches) + end + close(bench) + return (__sample_instructions, __sample_branches, result) +end diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl deleted file mode 100644 index d65b613b..00000000 --- a/src/LinuxPerf.jl +++ /dev/null @@ -1,3 +0,0 @@ -struct LinuxPerfParameters - -end \ No newline at end of file diff --git a/src/execution.jl b/src/execution.jl index c5998543..ae99cf80 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -110,18 +110,18 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, k params = Parameters(p; kwargs...) @assert params.seconds > 0.0 "time limit must be greater than 0.0" if warmup - b.samplefunc(b.quote_vals, Parameters(params; evals=1)) #warmup sample + b.samplefunc(1, b.quote_vals, Parameters(params; evals=1)) #warmup sample end trial = Trial(params) params.gctrial && gcscrub() start_time = Base.time() - s = b.samplefunc(b.quote_vals, params) + s = b.samplefunc(params.evals, b.quote_vals, params) push!(trial, s[1:(end - 1)]...) return_val = s[end] iters = 2 while (Base.time() - start_time) < params.seconds && iters ≤ params.samples params.gcsample && gcscrub() - push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...) + push!(trial, b.samplefunc(params.evals, b.quote_vals, params)[1:(end - 1)]...) iters += 1 end return trial, return_val @@ -183,13 +183,13 @@ function _lineartrial(b::Benchmark, p::Parameters=b.params; maxevals=RESOLUTION, estimates = zeros(maxevals) completed = 0 params.evals = 1 - b.samplefunc(b.quote_vals, params) #warmup sample + b.samplefunc(params.evals, b.quote_vals, params) #warmup sample params.gctrial && gcscrub() start_time = time() for evals in eachindex(estimates) params.gcsample && gcscrub() params.evals = evals - estimates[evals] = first(b.samplefunc(b.quote_vals, params)) + estimates[evals] = first(b.samplefunc(params.evals, b.quote_vals, params)) completed += 1 ((time() - start_time) > params.seconds) && break end @@ -600,6 +600,7 @@ function posthook((gc_start, start_time), evals, params::Parameters) return time, gctime, memory, allocs, return_val end + ###################### # convenience macros # ######################