@@ -9,10 +9,12 @@ import Batteries
9
9
10
10
open Batteries (RBMap)
11
11
12
- -- Get the average Cronos time in nanoseconds
13
- def avgTime (c : Cronos) : Float :=
14
- let timings := c.data.valuesList
15
- Float.ofNat timings.sum / Float.ofNat timings.length
12
+ /--
13
+ Benchmarking library modeled after Criterion in Haskell and Rust
14
+
15
+ Limitations:
16
+ - Measures time in nanoseconds using `IO.monoNanosNow`, which is less precise than the picoseconds used in Criterion.rs
17
+ -/
16
18
17
19
-- This prevents inline optimization for benchmarking, but doesn't work if the result is never used
18
20
--@[never_extract, noinline]
@@ -23,6 +25,23 @@ def avgTime (c : Cronos) : Float :=
23
25
def blackBoxIO (f : α → β) (a : α) : IO β := do
24
26
pure $ f a
25
27
28
+ def Float.toNanos (f : Float) : Float := f * 10 ^ 9
29
+
30
+ def Float.toSeconds (f : Float) : Float := f / 10 ^ 9
31
+
32
+ def Nat.toSeconds (n : Nat) : Float := Float.ofNat n / 10 ^ 9
33
+
34
+ -- TODO: Maybe add tenths place before M and B
35
+ def Nat.natPretty (n : Nat) : String :=
36
+ if n ≥ 10 ^ 9
37
+ then
38
+ toString (n / 10 ^ 9 ) ++ "B"
39
+ else if n ≥ 10 ^ 6
40
+ then
41
+ toString (n / 10 ^ 6 ) ++ "M"
42
+ else
43
+ toString n
44
+
26
45
def putStrNat (tup : String × Nat) : Ixon.PutM := do
27
46
Ixon.putExpr $ .strl tup.fst
28
47
Ixon.putExpr $ .natl tup.snd
@@ -96,8 +115,17 @@ instance : Ixon.Serialize BenchData where
96
115
--def myBD : BenchData := { mean := 100, stdDev := 1.0 }
97
116
--#eval (Ixon.Serialize.get (Ixon.Serialize.put myBD) : Except String BenchData)
98
117
118
+ inductive SamplingMode where
119
+ | flat : SamplingMode
120
+ | linear : SamplingMode
121
+ deriving Repr
122
+
99
123
structure Config where
100
- numRuns : Nat := 1000
124
+ warmupTime : Float := 3 .0
125
+ sampleTime : Float := 5 .0
126
+ numSamples : Nat := 100
127
+ samplingMode : SamplingMode := .flat
128
+ bootstrapSamples : Nat := 100000
101
129
noiseThreshold : Float := 0 .02
102
130
deriving Repr
103
131
@@ -113,20 +141,40 @@ def Float.variablePrecision (float : Float) (precision : Nat) : Float :=
113
141
-- Move the decimal right to the desired precision, round to the nearest integer, then move the decimal back
114
142
(float * moveDecimal).round / moveDecimal
115
143
116
- --#eval Float.variablePrecision 10.12345 4
117
-
144
+ -- Panics if float is a NaN
118
145
def Float.floatPretty (float : Float) (precision : Nat): String :=
119
146
let precise := float.variablePrecision precision
120
- precise.toString.dropRightWhile (· == '0' )
121
-
122
- --#eval Float.floatPretty 10.12345 4
123
-
124
- -- TODO: Pretty-print units based on nearest precision (nano, micro, etc)
125
- --def convertUnits
147
+ let parts := precise.toString.splitOn "."
148
+ let integer := parts[0 ]!
149
+ let fractional := parts[1 ]!.take precision
150
+ if !fractional.isEmpty
151
+ then integer ++ "." ++ fractional
152
+ else integer
153
+
154
+ -- Formats a time in ns as an xx.yy string with correct units
155
+ def Float.formatNanos (f : Float) : String :=
156
+ if f ≥ 10 ^ 9
157
+ then
158
+ (f / 10 ^ 9 ).floatPretty 2 ++ "s"
159
+ else if f ≥ 10 ^ 6
160
+ then
161
+ (f / 10 ^ 6 ).floatPretty 2 ++ "ms"
162
+ else if f ≥ 10 ^ 3
163
+ then
164
+ (f / 10 ^ 3 ).floatPretty 2 ++ "µs"
165
+ else
166
+ f.floatPretty 2 ++ "ns"
126
167
127
168
def BenchGroup.analyzeStats (bg : BenchGroup) (baseData : BenchData) (newData : BenchData) : IO BenchData := do
128
- let change := percentChange baseData.mean newData.mean
129
- IO.println s! "Percent change: { change.floatPretty 2 } %"
169
+ --IO.println s!"Data to compare: {baseData.mean} {newData.mean}"
170
+ let change ← if baseData.mean == 0
171
+ then
172
+ IO.println s! "Percent change is infinite, `baseData` mean is 0"
173
+ pure 0 .0
174
+ else
175
+ let change := percentChange baseData.mean newData.mean
176
+ IO.println s! "Percent change: { change.floatPretty 2 } %"
177
+ pure change
130
178
let nT := bg.config.noiseThreshold * 100
131
179
--IO.println s!"Noise threshold: {nT}%"
132
180
if change.abs > nT
@@ -181,27 +229,215 @@ structure Benchmarkable (α β : Type) where
181
229
182
230
def bench (name : String) (func : α → β) (arg : α) : Benchmarkable α β := ⟨ name, func, arg ⟩
183
231
184
- def BenchGroup.runBench (bg : BenchGroup) (bench : Benchmarkable α β) : IO Unit := do
185
- let mut cron := Cronos.new
186
- for run in List.range bg.config.numRuns do
187
- let benchName := s! "{ bench.name} run { run} "
188
- --IO.println benchName
189
- cron ← cron.clock benchName
232
+ def BenchGroup.warmup (bg : BenchGroup) (bench : Benchmarkable α β) : IO Float := do
233
+ IO.println s! "Warming up for { bg.config.warmupTime.floatPretty 2 } s"
234
+ let mut count := 0
235
+ let warmupNanos := Cronos.secToNano bg.config.warmupTime
236
+ let mut elapsed := 0
237
+ let startTime ← IO.monoNanosNow
238
+ while elapsed < warmupNanos do
190
239
let _res ← blackBoxIO bench.func bench.arg
191
- cron ← cron.clock benchName
192
- let results : BenchData := { mean := avgTime cron, stdDev := Float.ofNat 1 }
193
- IO.println s! "{ bench.name} avg: { results.mean} ns"
194
- bg.storeBench bench.name results
240
+ count := count + 1
241
+ let now ← IO.monoNanosNow
242
+ elapsed := now - startTime
243
+ let mean := Float.ofNat elapsed / Float.ofNat count
244
+ --IO.println s!"{bench.name} warmup avg: {mean}ns"
245
+ --IO.println s!"Ran {count} iterations in {bg.config.warmupTime.floatPretty 2}s\n"
246
+ return mean
247
+
248
+ -- TODO: Combine with other sampling functions, DRY
249
+ -- TODO: Recommend sample count if expectedTime >>> bg.config.sampleTime (i.e. itersPerSample == 1)
250
+ def BenchGroup.sampleFlat (bg : BenchGroup) (bench : Benchmarkable α β)
251
+ (warmupMean : Float) : IO (List Float) := do
252
+ let targetTime := bg.config.sampleTime.toNanos
253
+ let timePerSample := targetTime / (Float.ofNat bg.config.numSamples)
254
+ let itersPerSample := (timePerSample / warmupMean).ceil.toUInt64.toNat.max 1
255
+ let totalIters := itersPerSample * bg.config.numSamples
256
+ let expectedTime := warmupMean * Float.ofNat itersPerSample * bg.config.numSamples.toSeconds
257
+ if itersPerSample == 1
258
+ then
259
+ IO.eprintln s! "Warning: Unable to complete { bg.config.numSamples} samples in { bg.config.sampleTime.floatPretty 2 } s. You may wish to increase target time to { expectedTime.floatPretty 2 } s"
260
+ IO.println s! "Running { bg.config.numSamples} samples in { expectedTime.floatPretty 2 } s ({ totalIters} iterations)"
261
+ --IO.println s!"Flat sample. Iters per sample: {itersPerSample}"
262
+ let mut timings : List Float := []
263
+ for _s in List.range bg.config.numSamples do
264
+ --IO.println s!"Running sample {s}"
265
+ let mut sum := 0
266
+ for _ in List.range itersPerSample do
267
+ let start ← IO.monoNanosNow
268
+ let _res ← blackBoxIO bench.func bench.arg
269
+ let finish ← IO.monoNanosNow
270
+ sum := sum + (finish - start)
271
+ let mean := Float.ofNat sum / Float.ofNat itersPerSample
272
+ --IO.println s!"{bench.name} run {s} avg: {mean}ns"
273
+ timings := mean :: timings
274
+ return timings.reverse
275
+
276
+ -- Runs the samples as a sequence of linearly increasing iterations [d, 2d, 3d, ..., Nd]
277
+ -- where `N` is the total number of samples and `d` is a factor derived from the warmup iteration time. The sum of this series should be roughly equivalent to the total `sampleTime`
278
+ def BenchGroup.sampleLinear (bg : BenchGroup) (bench : Benchmarkable α β) (warmupMean : Float) : IO (List Float) := do
279
+ -- `d` has a minimum value of 1 if the `warmupMean` is sufficiently large
280
+ -- In this case,
281
+ let totalRuns := bg.config.numSamples * (bg.config.numSamples + 1 ) / 2
282
+ let targetTime := bg.config.sampleTime.toNanos
283
+ let d := (targetTime / warmupMean / (Float.ofNat totalRuns)).ceil.toUInt64.toNat.max 1
284
+ let expectedTime := (Float.ofNat totalRuns) * (Float.ofNat d) * warmupMean.toSeconds
285
+ let totalIters := (List.range bg.config.numSamples).map (fun x => (x + 1 ) * d)
286
+ if d == 1
287
+ then
288
+ IO.eprintln s! "Warning: Unable to complete { bg.config.numSamples} samples in { bg.config.sampleTime.floatPretty 2 } s. You may wish to increase target time to { expectedTime.floatPretty 2 } s"
289
+ IO.println s! "Running { bg.config.numSamples} samples in { expectedTime.floatPretty 2 } s ({ totalIters.sum.natPretty} iterations)"
290
+ --IO.println s!"Linear sample. Iters increase by a factor of {d} per sample"
291
+ let mut timings : List Float := []
292
+ for iters in totalIters do
293
+ --IO.println s!"Sample {s} with {iters} iterations"
294
+ let mut sum := 0
295
+ for _i in List.range iters do
296
+ let start ← IO.monoNanosNow
297
+ let _res ← blackBoxIO bench.func bench.arg
298
+ let finish ← IO.monoNanosNow
299
+ sum := sum + (finish - start)
300
+ let mean := Float.ofNat sum / Float.ofNat iters
301
+ --IO.println s!"Avg: {mean}ns"
302
+ timings := mean :: timings
303
+ return timings.reverse
304
+
305
+ def BenchGroup.sample (bg : BenchGroup) (bench : Benchmarkable α β) (warmupMean : Float) : IO (List Float) := do
306
+ match bg.config.samplingMode with
307
+ | .flat => bg.sampleFlat bench warmupMean
308
+ | .linear => bg.sampleLinear bench warmupMean
309
+
310
+ def percentile? (data : List Float) (p : Float): Option Float :=
311
+ if data.isEmpty || p > 100
312
+ then .none
313
+ else
314
+ let data := data.sortBy (fun x y => compareOfLessAndBEq x y)
315
+ let r := (p / 100 ) * (Float.ofNat data.length - 1 ) + 1
316
+ let rf := r - r.floor
317
+ if rf == 0
318
+ then
319
+ data[r.toUInt64.toNat - 1 ]?
320
+ else
321
+ let ri := r.floor.toUInt64.toNat
322
+ -- TODO: use fallible ? indices and monadically chain state
323
+ .some $ data[ri-1 ]! + (rf) * (data[ri]! - data[ri-1 ]!)
324
+
325
+ -- Outliers are classified following https://bheisler.github.io/criterion.rs/book/analysis.html#outlier-classification
326
+ structure Outliers where
327
+ outliers : List Float
328
+ highSevere : Nat
329
+ highMild : Nat
330
+ lowMild : Nat
331
+ lowSevere : Nat
332
+ deriving Repr
333
+
334
+ def Outliers.getTotal (o : Outliers) : Nat :=
335
+ o.highSevere + o.highMild + o.lowMild + o.lowSevere
336
+
337
+ -- TODO: Clean up and return the list for plotting
338
+ def tukey (data : List Float) : IO Unit := do
339
+ let upper := (percentile? data 75 ).get!
340
+ let lower := (percentile? data 25 ).get!
341
+ let iqr := upper - lower
342
+ let fences := [lower - 3 * iqr, lower - 1 .5 * iqr, upper + 1 .5 * iqr, upper + 3 * iqr]
343
+ let mut out : Outliers := ⟨ [], 0 , 0 , 0 , 0 ⟩
344
+ for elem in data do
345
+ if elem < fences[1 ]!
346
+ then
347
+ if elem < fences[0 ]! then
348
+ out := { out with outliers := elem :: out.outliers, lowSevere := out.lowSevere + 1 }
349
+ else
350
+ out := { out with outliers := elem :: out.outliers, lowMild := out.lowMild + 1 }
351
+ else if elem > fences[2 ]! then
352
+ if elem > fences[3 ]! then
353
+ out := { out with outliers := elem :: out.outliers, highSevere := out.highSevere + 1 }
354
+ else
355
+ out := { out with outliers := elem :: out.outliers, highMild := out.highMild + 1 }
356
+ let outLength := out.outliers.length
357
+ if outLength > 0 then
358
+ let samples := data.length
359
+ IO.println s! "Found { outLength} outliers among { samples} measurements"
360
+ if out.lowMild > 0 then
361
+ let pct := Float.ofNat out.lowMild / (Float.ofNat samples) * 100
362
+ IO.println s! " { out.lowMild} ({ pct.floatPretty 2 } %) low mild"
363
+ if out.lowSevere > 0 then
364
+ let pct := Float.ofNat out.lowSevere / (Float.ofNat samples) * 100
365
+ IO.println s! " { out.lowSevere} ({ pct.floatPretty 2 } %) low severe"
366
+ if out.highMild > 0 then
367
+ let pct := Float.ofNat out.highMild / (Float.ofNat samples) * 100
368
+ IO.println s! " { out.highMild} ({ pct.floatPretty 2 } %) high mild"
369
+ if out.highSevere > 0 then
370
+ let pct := Float.ofNat out.highSevere / (Float.ofNat samples) * 100
371
+ IO.println s! " { out.highSevere} ({ pct.floatPretty 2 } %) high severe"
372
+
373
+ --#eval tukey [ 1,2,3,4,9999999999 ]
374
+
375
+ structure RunningStats where
376
+ count : Nat
377
+ mean : Float
378
+ m2 : Float
379
+
380
+ -- Reference impl https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
381
+ def RunningStats.update (stt: RunningStats) (newValue : Float) : RunningStats :=
382
+ let count := stt.count + 1
383
+ let delta := newValue - stt.mean
384
+ let mean := stt.mean + delta / Float.ofNat count
385
+ let delta' := newValue - mean
386
+ let m2 := stt.m2 + delta * delta'
387
+ { count, mean, m2 }
388
+
389
+ def RunningStats.finalize (stt: RunningStats) : BenchData :=
390
+ if stt.count < 2
391
+ then { mean := 0 , stdDev := 0 }
392
+ else
393
+ { mean := stt.mean, stdDev := (stt.m2 / (Float.ofNat stt.count - 1 )).sqrt }
394
+
395
+ -- TODO: Rewrite with monads as an exercise
396
+ def pickRandom (xs : List Float) (gen : StdGen) : (Float × StdGen) :=
397
+ let (res, gen') := randNat gen 0 (xs.length - 1 )
398
+ (xs[res]!, gen')
399
+
400
+ def resample (xs : List Float) (gen : StdGen) (sampleSize : Nat) : (List Float × StdGen) :=
401
+ go xs gen sampleSize []
402
+ where
403
+ go xs gen n ys :=
404
+ match n with
405
+ | 0 => (ys.reverse, gen)
406
+ | n + 1 =>
407
+ let (res, gen') := pickRandom xs gen
408
+ go xs gen' n (res :: ys)
409
+
410
+ -- TODO: Rewrite as pure function with state monad
411
+ def bootstrap (xs : List Float) (gen : StdGen) (numSamples : Nat) : IO BenchData := do
412
+ if numSamples < 2
413
+ then
414
+ IO.eprintln "Error: need at least 2 samples to perform analysis"
415
+ let mut gen' := gen
416
+ let mut stt : RunningStats := { count := 0 , mean := 0 , m2 := 0 }
417
+ for _s in List.range numSamples do
418
+ let (res, gen'') := resample xs gen' xs.length
419
+ --IO.println res
420
+ let mean := res.sum / Float.ofNat res.length
421
+ stt := stt.update mean
422
+ gen' := gen''
423
+ return stt.finalize
424
+
425
+ --#eval bootstrap ((List.range 100).map (Float.ofNat)) mkStdGen 10000
195
426
196
- -- TODO: Add warm-up time to prevent slow first runs
197
427
-- TODO: Make sure compiler isn't caching partial evaluation result for future runs of the same function (measure first vs subsequent runs)
198
428
-- A benchmark group defines a collection of related benchmarks that share a configuration, such as number of runs and noise threshold
199
429
def bgroup {α β : Type } (name: String) (benches : List (Benchmarkable α β)) (config : Config := {}) : IO Unit := do
200
- let benchGroup : BenchGroup := { name, config }
430
+ let bg : BenchGroup := { name, config }
201
431
--IO.println s!"My config: {repr config}"
202
432
IO.println s! "Running bench group { name} \n "
203
433
for b in benches do
204
- let _results ← benchGroup.runBench b
434
+ let warmupMean ← bg.warmup b
435
+ IO.println s! "Running { b.name} "
436
+ let timings ← bg.sample b warmupMean
437
+ tukey timings
438
+ -- TODO: Add proper randomness with OsRng
439
+ let bd ← bootstrap timings mkStdGen bg.config.bootstrapSamples
440
+ IO.println s! "Mean: { bd.mean.formatNanos} "
441
+ IO.println s! "Standard deviation: { bd.stdDev} "
442
+ bg.storeBench b.name bd
205
443
IO.println ""
206
-
207
- --#eval Float.ofBits (Float.ofNat 1000).toBits
0 commit comments