Skip to content

Commit 5ea6f96

Browse files
committed
Add bootstrap sampling, warmup, and outliers
1 parent 7a4f422 commit 5ea6f96

File tree

3 files changed

+274
-31
lines changed

3 files changed

+274
-31
lines changed

Benchmarks/Main.lean

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def fibBench := (bgroup "fib" [
1010
bench "fib 1" fib 1,
1111
bench "fib 2" fib 2,
1212
bench "fib 30" fib 30
13-
] { numRuns := 100 } )
13+
] { numSamples := 100, samplingMode := .linear } )
1414

1515
def addBench := (bgroup "add" [
1616
bench "add 1 2" (Nat.add 1) 2

Ix/Benchmark.lean

Lines changed: 266 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@ import Batteries
99

1010
open Batteries (RBMap)
1111

12-
-- Get the average Cronos time in nanoseconds
13-
def avgTime (c : Cronos) : Float :=
14-
let timings := c.data.valuesList
15-
Float.ofNat timings.sum / Float.ofNat timings.length
12+
/--
13+
Benchmarking library modeled after Criterion in Haskell and Rust
14+
15+
Limitations:
16+
- Measures time in nanoseconds using `IO.monoNanosNow`, which is less precise than the picoseconds used in Criterion.rs
17+
-/
1618

1719
-- This prevents inline optimization for benchmarking, but doesn't work if the result is never used
1820
--@[never_extract, noinline]
@@ -23,6 +25,23 @@ def avgTime (c : Cronos) : Float :=
2325
def blackBoxIO (f : α → β) (a : α) : IO β := do
2426
pure $ f a
2527

28+
def Float.toNanos (f : Float) : Float := f * 10 ^ 9
29+
30+
def Float.toSeconds (f : Float) : Float := f / 10 ^ 9
31+
32+
def Nat.toSeconds (n : Nat) : Float := Float.ofNat n / 10 ^ 9
33+
34+
-- TODO: Maybe add tenths place before M and B
35+
def Nat.natPretty (n : Nat) : String :=
36+
if n ≥ 10 ^ 9
37+
then
38+
toString (n / 10 ^ 9) ++ "B"
39+
else if n ≥ 10 ^ 6
40+
then
41+
toString (n / 10 ^ 6 ) ++ "M"
42+
else
43+
toString n
44+
2645
def putStrNat (tup : String × Nat) : Ixon.PutM := do
2746
Ixon.putExpr $ .strl tup.fst
2847
Ixon.putExpr $ .natl tup.snd
@@ -96,8 +115,17 @@ instance : Ixon.Serialize BenchData where
96115
--def myBD : BenchData := { mean := 100, stdDev := 1.0 }
97116
--#eval (Ixon.Serialize.get (Ixon.Serialize.put myBD) : Except String BenchData)
98117

118+
inductive SamplingMode where
119+
| flat : SamplingMode
120+
| linear : SamplingMode
121+
deriving Repr
122+
99123
structure Config where
100-
numRuns : Nat := 1000
124+
warmupTime : Float := 3.0
125+
sampleTime : Float := 5.0
126+
numSamples : Nat := 100
127+
samplingMode : SamplingMode := .flat
128+
bootstrapSamples : Nat := 100000
101129
noiseThreshold : Float := 0.02
102130
deriving Repr
103131

@@ -113,20 +141,40 @@ def Float.variablePrecision (float : Float) (precision : Nat) : Float :=
113141
-- Move the decimal right to the desired precision, round to the nearest integer, then move the decimal back
114142
(float * moveDecimal).round / moveDecimal
115143

116-
--#eval Float.variablePrecision 10.12345 4
117-
144+
-- Panics if float is a NaN
118145
def Float.floatPretty (float : Float) (precision : Nat): String :=
119146
let precise := float.variablePrecision precision
120-
precise.toString.dropRightWhile (· == '0')
121-
122-
--#eval Float.floatPretty 10.12345 4
123-
124-
-- TODO: Pretty-print units based on nearest precision (nano, micro, etc)
125-
--def convertUnits
147+
let parts := precise.toString.splitOn "."
148+
let integer := parts[0]!
149+
let fractional := parts[1]!.take precision
150+
if !fractional.isEmpty
151+
then integer ++ "." ++ fractional
152+
else integer
153+
154+
-- Formats a time in ns as an xx.yy string with correct units
155+
def Float.formatNanos (f : Float) : String :=
156+
if f ≥ 10 ^ 9
157+
then
158+
(f / 10 ^ 9).floatPretty 2 ++ "s"
159+
else if f ≥ 10 ^ 6
160+
then
161+
(f / 10 ^ 6).floatPretty 2 ++ "ms"
162+
else if f ≥ 10 ^ 3
163+
then
164+
(f / 10 ^ 3).floatPretty 2 ++ "µs"
165+
else
166+
f.floatPretty 2 ++ "ns"
126167

127168
def BenchGroup.analyzeStats (bg : BenchGroup) (baseData : BenchData) (newData : BenchData) : IO BenchData := do
128-
let change := percentChange baseData.mean newData.mean
129-
IO.println s!"Percent change: {change.floatPretty 2}%"
169+
--IO.println s!"Data to compare: {baseData.mean} {newData.mean}"
170+
let change ← if baseData.mean == 0
171+
then
172+
IO.println s!"Percent change is infinite, `baseData` mean is 0"
173+
pure 0.0
174+
else
175+
let change := percentChange baseData.mean newData.mean
176+
IO.println s!"Percent change: {change.floatPretty 2}%"
177+
pure change
130178
let nT := bg.config.noiseThreshold * 100
131179
--IO.println s!"Noise threshold: {nT}%"
132180
if change.abs > nT
@@ -181,27 +229,215 @@ structure Benchmarkable (α β : Type) where
181229

182230
def bench (name : String) (func : α → β) (arg : α) : Benchmarkable α β := ⟨ name, func, arg ⟩
183231

184-
def BenchGroup.runBench (bg : BenchGroup) (bench : Benchmarkable α β) : IO Unit := do
185-
let mut cron := Cronos.new
186-
for run in List.range bg.config.numRuns do
187-
let benchName := s!"{bench.name} run {run}"
188-
--IO.println benchName
189-
cron ← cron.clock benchName
232+
def BenchGroup.warmup (bg : BenchGroup) (bench : Benchmarkable α β) : IO Float := do
233+
IO.println s!"Warming up for {bg.config.warmupTime.floatPretty 2}s"
234+
let mut count := 0
235+
let warmupNanos := Cronos.secToNano bg.config.warmupTime
236+
let mut elapsed := 0
237+
let startTime ← IO.monoNanosNow
238+
while elapsed < warmupNanos do
190239
let _res ← blackBoxIO bench.func bench.arg
191-
cron ← cron.clock benchName
192-
let results : BenchData := { mean := avgTime cron, stdDev := Float.ofNat 1 }
193-
IO.println s!"{bench.name} avg: {results.mean}ns"
194-
bg.storeBench bench.name results
240+
count := count + 1
241+
let now ← IO.monoNanosNow
242+
elapsed := now - startTime
243+
let mean := Float.ofNat elapsed / Float.ofNat count
244+
--IO.println s!"{bench.name} warmup avg: {mean}ns"
245+
--IO.println s!"Ran {count} iterations in {bg.config.warmupTime.floatPretty 2}s\n"
246+
return mean
247+
248+
-- TODO: Combine with other sampling functions, DRY
249+
-- TODO: Recommend sample count if expectedTime >>> bg.config.sampleTime (i.e. itersPerSample == 1)
250+
def BenchGroup.sampleFlat (bg : BenchGroup) (bench : Benchmarkable α β)
251+
(warmupMean : Float) : IO (List Float) := do
252+
let targetTime := bg.config.sampleTime.toNanos
253+
let timePerSample := targetTime / (Float.ofNat bg.config.numSamples)
254+
let itersPerSample := (timePerSample / warmupMean).ceil.toUInt64.toNat.max 1
255+
let totalIters := itersPerSample * bg.config.numSamples
256+
let expectedTime := warmupMean * Float.ofNat itersPerSample * bg.config.numSamples.toSeconds
257+
if itersPerSample == 1
258+
then
259+
IO.eprintln s!"Warning: Unable to complete {bg.config.numSamples} samples in {bg.config.sampleTime.floatPretty 2}s. You may wish to increase target time to {expectedTime.floatPretty 2}s"
260+
IO.println s!"Running {bg.config.numSamples} samples in {expectedTime.floatPretty 2}s ({totalIters} iterations)"
261+
--IO.println s!"Flat sample. Iters per sample: {itersPerSample}"
262+
let mut timings : List Float := []
263+
for _s in List.range bg.config.numSamples do
264+
--IO.println s!"Running sample {s}"
265+
let mut sum := 0
266+
for _ in List.range itersPerSample do
267+
let start ← IO.monoNanosNow
268+
let _res ← blackBoxIO bench.func bench.arg
269+
let finish ← IO.monoNanosNow
270+
sum := sum + (finish - start)
271+
let mean := Float.ofNat sum / Float.ofNat itersPerSample
272+
--IO.println s!"{bench.name} run {s} avg: {mean}ns"
273+
timings := mean :: timings
274+
return timings.reverse
275+
276+
-- Runs the samples as a sequence of linearly increasing iterations [d, 2d, 3d, ..., Nd]
277+
-- where `N` is the total number of samples and `d` is a factor derived from the warmup iteration time. The sum of this series should be roughly equivalent to the total `sampleTime`
278+
def BenchGroup.sampleLinear (bg : BenchGroup) (bench : Benchmarkable α β) (warmupMean : Float) : IO (List Float) := do
279+
-- `d` has a minimum value of 1 if the `warmupMean` is sufficiently large
280+
-- In this case,
281+
let totalRuns := bg.config.numSamples * (bg.config.numSamples + 1) / 2
282+
let targetTime := bg.config.sampleTime.toNanos
283+
let d := (targetTime / warmupMean / (Float.ofNat totalRuns)).ceil.toUInt64.toNat.max 1
284+
let expectedTime := (Float.ofNat totalRuns) * (Float.ofNat d) * warmupMean.toSeconds
285+
let totalIters := (List.range bg.config.numSamples).map (fun x => (x + 1) * d)
286+
if d == 1
287+
then
288+
IO.eprintln s!"Warning: Unable to complete {bg.config.numSamples} samples in {bg.config.sampleTime.floatPretty 2}s. You may wish to increase target time to {expectedTime.floatPretty 2}s"
289+
IO.println s!"Running {bg.config.numSamples} samples in {expectedTime.floatPretty 2}s ({totalIters.sum.natPretty} iterations)"
290+
--IO.println s!"Linear sample. Iters increase by a factor of {d} per sample"
291+
let mut timings : List Float := []
292+
for iters in totalIters do
293+
--IO.println s!"Sample {s} with {iters} iterations"
294+
let mut sum := 0
295+
for _i in List.range iters do
296+
let start ← IO.monoNanosNow
297+
let _res ← blackBoxIO bench.func bench.arg
298+
let finish ← IO.monoNanosNow
299+
sum := sum + (finish - start)
300+
let mean := Float.ofNat sum / Float.ofNat iters
301+
--IO.println s!"Avg: {mean}ns"
302+
timings := mean :: timings
303+
return timings.reverse
304+
305+
def BenchGroup.sample (bg : BenchGroup) (bench : Benchmarkable α β) (warmupMean : Float) : IO (List Float) := do
306+
match bg.config.samplingMode with
307+
| .flat => bg.sampleFlat bench warmupMean
308+
| .linear => bg.sampleLinear bench warmupMean
309+
310+
def percentile? (data : List Float) (p : Float): Option Float :=
311+
if data.isEmpty || p > 100
312+
then .none
313+
else
314+
let data := data.sortBy (fun x y => compareOfLessAndBEq x y)
315+
let r := (p / 100) * (Float.ofNat data.length - 1) + 1
316+
let rf := r - r.floor
317+
if rf == 0
318+
then
319+
data[r.toUInt64.toNat - 1]?
320+
else
321+
let ri := r.floor.toUInt64.toNat
322+
-- TODO: use fallible ? indices and monadically chain state
323+
.some $ data[ri-1]! + (rf) * (data[ri]! - data[ri-1]!)
324+
325+
-- Outliers are classified following https://bheisler.github.io/criterion.rs/book/analysis.html#outlier-classification
326+
structure Outliers where
327+
outliers : List Float
328+
highSevere : Nat
329+
highMild : Nat
330+
lowMild : Nat
331+
lowSevere : Nat
332+
deriving Repr
333+
334+
def Outliers.getTotal (o : Outliers) : Nat :=
335+
o.highSevere + o.highMild + o.lowMild + o.lowSevere
336+
337+
-- TODO: Clean up and return the list for plotting
338+
def tukey (data : List Float) : IO Unit := do
339+
let upper := (percentile? data 75).get!
340+
let lower := (percentile? data 25).get!
341+
let iqr := upper - lower
342+
let fences := [lower - 3 * iqr, lower - 1.5 * iqr, upper + 1.5 * iqr, upper + 3 * iqr]
343+
let mut out : Outliers := ⟨ [], 0, 0, 0, 0
344+
for elem in data do
345+
if elem < fences[1]!
346+
then
347+
if elem < fences[0]! then
348+
out := { out with outliers := elem :: out.outliers, lowSevere := out.lowSevere + 1 }
349+
else
350+
out := { out with outliers := elem :: out.outliers, lowMild := out.lowMild + 1 }
351+
else if elem > fences[2]! then
352+
if elem > fences[3]! then
353+
out := { out with outliers := elem :: out.outliers, highSevere := out.highSevere + 1 }
354+
else
355+
out := { out with outliers := elem :: out.outliers, highMild := out.highMild + 1 }
356+
let outLength := out.outliers.length
357+
if outLength > 0 then
358+
let samples := data.length
359+
IO.println s!"Found {outLength} outliers among {samples} measurements"
360+
if out.lowMild > 0 then
361+
let pct := Float.ofNat out.lowMild / (Float.ofNat samples) * 100
362+
IO.println s!" {out.lowMild} ({pct.floatPretty 2}%) low mild"
363+
if out.lowSevere > 0 then
364+
let pct := Float.ofNat out.lowSevere / (Float.ofNat samples) * 100
365+
IO.println s!" {out.lowSevere} ({pct.floatPretty 2}%) low severe"
366+
if out.highMild > 0 then
367+
let pct := Float.ofNat out.highMild / (Float.ofNat samples) * 100
368+
IO.println s!" {out.highMild} ({pct.floatPretty 2}%) high mild"
369+
if out.highSevere > 0 then
370+
let pct := Float.ofNat out.highSevere / (Float.ofNat samples) * 100
371+
IO.println s!" {out.highSevere} ({pct.floatPretty 2}%) high severe"
372+
373+
--#eval tukey [1,2,3,4,9999999999]
374+
375+
structure RunningStats where
376+
count : Nat
377+
mean : Float
378+
m2 : Float
379+
380+
-- Reference impl https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
381+
def RunningStats.update (stt: RunningStats) (newValue : Float) : RunningStats :=
382+
let count := stt.count + 1
383+
let delta := newValue - stt.mean
384+
let mean := stt.mean + delta / Float.ofNat count
385+
let delta' := newValue - mean
386+
let m2 := stt.m2 + delta * delta'
387+
{ count, mean, m2 }
388+
389+
def RunningStats.finalize (stt: RunningStats) : BenchData :=
390+
if stt.count < 2
391+
then { mean := 0, stdDev := 0 }
392+
else
393+
{ mean := stt.mean, stdDev := (stt.m2 / (Float.ofNat stt.count - 1)).sqrt }
394+
395+
-- TODO: Rewrite with monads as an exercise
396+
def pickRandom (xs : List Float) (gen : StdGen) : (Float × StdGen) :=
397+
let (res, gen') := randNat gen 0 (xs.length - 1)
398+
(xs[res]!, gen')
399+
400+
def resample (xs : List Float) (gen : StdGen) (sampleSize : Nat) : (List Float × StdGen) :=
401+
go xs gen sampleSize []
402+
where
403+
go xs gen n ys :=
404+
match n with
405+
| 0 => (ys.reverse, gen)
406+
| n + 1 =>
407+
let (res, gen') := pickRandom xs gen
408+
go xs gen' n (res :: ys)
409+
410+
-- TODO: Rewrite as pure function with state monad
411+
def bootstrap (xs : List Float) (gen : StdGen) (numSamples : Nat) : IO BenchData := do
412+
if numSamples < 2
413+
then
414+
IO.eprintln "Error: need at least 2 samples to perform analysis"
415+
let mut gen' := gen
416+
let mut stt : RunningStats := { count := 0, mean := 0, m2 := 0 }
417+
for _s in List.range numSamples do
418+
let (res, gen'') := resample xs gen' xs.length
419+
--IO.println res
420+
let mean := res.sum / Float.ofNat res.length
421+
stt := stt.update mean
422+
gen' := gen''
423+
return stt.finalize
424+
425+
--#eval bootstrap ((List.range 100).map (Float.ofNat)) mkStdGen 10000
195426

196-
-- TODO: Add warm-up time to prevent slow first runs
197427
-- TODO: Make sure compiler isn't caching partial evaluation result for future runs of the same function (measure first vs subsequent runs)
198428
-- A benchmark group defines a collection of related benchmarks that share a configuration, such as number of runs and noise threshold
199429
def bgroup {α β : Type} (name: String) (benches : List (Benchmarkable α β)) (config : Config := {}) : IO Unit := do
200-
let benchGroup : BenchGroup := { name, config }
430+
let bg : BenchGroup := { name, config }
201431
--IO.println s!"My config: {repr config}"
202432
IO.println s!"Running bench group {name}\n"
203433
for b in benches do
204-
let _results ← benchGroup.runBench b
434+
let warmupMean ← bg.warmup b
435+
IO.println s!"Running {b.name}"
436+
let timings ← bg.sample b warmupMean
437+
tukey timings
438+
-- TODO: Add proper randomness with OsRng
439+
let bd ← bootstrap timings mkStdGen bg.config.bootstrapSamples
440+
IO.println s!"Mean: {bd.mean.formatNanos}"
441+
IO.println s!"Standard deviation: {bd.stdDev}"
442+
bg.storeBench b.name bd
205443
IO.println ""
206-
207-
--#eval Float.ofBits (Float.ofNat 1000).toBits

Ix/Cronos.lean

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,17 @@ def clock (c : Cronos) (tag : String) : IO Cronos := do
2323
def nanoToSec (nanos : Nat) : Float :=
2424
Float.ofNat nanos / 1000000000
2525

26+
def secToNano (s : Float) : Nat :=
27+
s.toUInt64.toNat * 1000000000
28+
2629
def summary (c : Cronos) : String :=
2730
let timings := c.data.foldl (init := "")
2831
fun acc tag time => s!"{acc}\n {tag} | {nanoToSec time}s"
2932
s!"Timings:{timings}"
3033

34+
-- Get the average time in nanoseconds, returns NaN if no `data` entries
35+
def mean (c : Cronos) : Float :=
36+
let timings := c.data.valuesList
37+
Float.ofNat timings.sum / Float.ofNat timings.length
3138

3239
end Cronos

0 commit comments

Comments
 (0)