Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Constant stack size #2688

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 60 additions & 25 deletions src/BenchmarkDotNet/Engines/Engine.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Linq;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Characteristics;
using BenchmarkDotNet.Environments;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Mathematics;
using BenchmarkDotNet.Portability;
using BenchmarkDotNet.Reports;
using JetBrains.Annotations;
Expand Down Expand Up @@ -102,6 +105,39 @@ public void Dispose()
}
}

[MethodImpl(MethodImplOptions.NoInlining)]
private IEnumerable<(IterationStage stage, IterationMode mode, IEngineStageEvaluator evaluator)> EnumerateStages()
{
if (Strategy != RunStrategy.ColdStart)
{
if (Strategy != RunStrategy.Monitoring)
{
var pilotEvaluator = pilotStage.GetEvaluator();
if (pilotEvaluator != null)
{
yield return (IterationStage.Pilot, IterationMode.Workload, pilotEvaluator);
}

if (EvaluateOverhead)
{
yield return (IterationStage.Warmup, IterationMode.Overhead, warmupStage.GetOverheadEvaluator());
yield return (IterationStage.Actual, IterationMode.Overhead, actualStage.GetOverheadEvaluator());
}
}

yield return (IterationStage.Warmup, IterationMode.Workload, warmupStage.GetWorkloadEvaluator(Strategy));
}

Host.BeforeMainRun();

yield return (IterationStage.Actual, IterationMode.Workload, actualStage.GetWorkloadEvaluator(Strategy == RunStrategy.Monitoring));

Host.AfterMainRun();
}

// AggressiveOptimization forces the method to go straight to tier1 JIT, and will never be re-jitted,
// eliminating tiered JIT as a potential variable in measurements.
[MethodImpl(CodeGenHelper.AggressiveOptimizationOption)]
public RunResults Run()
{
var measurements = new List<Measurement>();
Expand All @@ -112,30 +148,24 @@ public RunResults Run()
if (EngineEventSource.Log.IsEnabled())
EngineEventSource.Log.BenchmarkStart(BenchmarkName);

if (Strategy != RunStrategy.ColdStart)
{
if (Strategy != RunStrategy.Monitoring)
{
var pilotStageResult = pilotStage.Run();
invokeCount = pilotStageResult.PerfectInvocationCount;
measurements.AddRange(pilotStageResult.Measurements);

if (EvaluateOverhead)
{
measurements.AddRange(warmupStage.RunOverhead(invokeCount, UnrollFactor));
measurements.AddRange(actualStage.RunOverhead(invokeCount, UnrollFactor));
}
}

measurements.AddRange(warmupStage.RunWorkload(invokeCount, UnrollFactor, Strategy));
// Enumerate the stages and run iterations in a loop to ensure each benchmark invocation is called with a constant stack size.
// #1120
foreach (var (stage, mode, evaluator) in EnumerateStages())
{
var stageMeasurements = new List<Measurement>(evaluator.MaxIterationCount);
int iterationCounter = 0;
while (!evaluator.EvaluateShouldStop(stageMeasurements, ref invokeCount))
{
// TODO: Not sure why index is 1-based? 0-based is standard.
++iterationCounter;
var measurement = RunIteration(new IterationData(mode, stage, iterationCounter, invokeCount, UnrollFactor));
stageMeasurements.Add(measurement);
}
measurements.AddRange(stageMeasurements);

WriteLine();
}

Host.BeforeMainRun();

measurements.AddRange(actualStage.RunWorkload(invokeCount, UnrollFactor, forceSpecific: Strategy == RunStrategy.Monitoring));

Host.AfterMainRun();

(GcStats workGcHasDone, ThreadingStats threadingStats, double exceptionFrequency) = includeExtraStats
? GetExtraStats(new IterationData(IterationMode.Workload, IterationStage.Actual, 0, invokeCount, UnrollFactor))
: (GcStats.Empty, ThreadingStats.Empty, 0);
Expand All @@ -148,11 +178,15 @@ public RunResults Run()
return new RunResults(measurements, outlierMode, workGcHasDone, threadingStats, exceptionFrequency);
}

[MethodImpl(CodeGenHelper.AggressiveOptimizationOption)]
public Measurement RunIteration(IterationData data)
{
// Initialization
long invokeCount = data.InvokeCount;
int unrollFactor = data.UnrollFactor;
if (invokeCount % unrollFactor != 0)
throw new ArgumentOutOfRangeException(nameof(data), $"InvokeCount({invokeCount}) should be a multiple of UnrollFactor({unrollFactor}).");

long totalOperations = invokeCount * OperationsPerInvoke;
bool isOverhead = data.IterationMode == IterationMode.Overhead;
bool randomizeMemory = !isOverhead && MemoryRandomization;
Expand All @@ -167,7 +201,7 @@ public Measurement RunIteration(IterationData data)
EngineEventSource.Log.IterationStart(data.IterationMode, data.IterationStage, totalOperations);

var clockSpan = randomizeMemory
? MeasureWithRandomMemory(action, invokeCount / unrollFactor)
? MeasureWithRandomStack(action, invokeCount / unrollFactor)
: Measure(action, invokeCount / unrollFactor);

if (EngineEventSource.Log.IsEnabled())
Expand All @@ -193,8 +227,8 @@ public Measurement RunIteration(IterationData data)
// This is in a separate method, because stackalloc can affect code alignment,
// resulting in unexpected measurements on some AMD cpus,
// even if the stackalloc branch isn't executed. (#2366)
[MethodImpl(MethodImplOptions.NoInlining)]
private unsafe ClockSpan MeasureWithRandomMemory(Action<long> action, long invokeCount)
[MethodImpl(MethodImplOptions.NoInlining | CodeGenHelper.AggressiveOptimizationOption)]
private unsafe ClockSpan MeasureWithRandomStack(Action<long> action, long invokeCount)
{
byte* stackMemory = stackalloc byte[random.Next(32)];
var clockSpan = Measure(action, invokeCount);
Expand All @@ -205,6 +239,7 @@ private unsafe ClockSpan MeasureWithRandomMemory(Action<long> action, long invok
[MethodImpl(MethodImplOptions.NoInlining)]
private unsafe void Consume(byte* _) { }

[MethodImpl(MethodImplOptions.NoInlining | CodeGenHelper.AggressiveOptimizationOption)]
private ClockSpan Measure(Action<long> action, long invokeCount)
{
var clock = Clock.Start();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,5 +86,63 @@ private List<Measurement> RunSpecific(long invokeCount, IterationMode iterationM

return measurements;
}

internal IEngineStageEvaluator GetOverheadEvaluator()
=> new AutoEvaluator(this, true);

internal IEngineStageEvaluator GetWorkloadEvaluator(bool forceSpecific)
=> iterationCount == null && !forceSpecific
? new AutoEvaluator(this, false)
: new SpecificEvaluator(this);

private sealed class AutoEvaluator(EngineActualStage stage, bool isOverhead) : IEngineStageEvaluator
{
public int MaxIterationCount => stage.maxIterationCount;

private readonly List<Measurement> _measurementsForStatistics = new (stage.maxIterationCount);
private int _iterationCounter = 0;

public bool EvaluateShouldStop(List<Measurement> measurements, ref long invokeCount)
{
if (measurements.Count == 0)
{
return false;
}

double effectiveMaxRelativeError = isOverhead ? MaxOverheadRelativeError : stage.maxRelativeError;
_iterationCounter++;
var measurement = measurements[measurements.Count - 1];
_measurementsForStatistics.Add(measurement);

var statistics = MeasurementsStatistics.Calculate(_measurementsForStatistics, stage.outlierMode);
double actualError = statistics.LegacyConfidenceInterval.Margin;

double maxError1 = effectiveMaxRelativeError * statistics.Mean;
double maxError2 = stage.maxAbsoluteError?.Nanoseconds ?? double.MaxValue;
double maxError = Math.Min(maxError1, maxError2);

if (_iterationCounter >= stage.minIterationCount && actualError < maxError)
{
return true;
}

if (_iterationCounter >= stage.maxIterationCount || isOverhead && _iterationCounter >= MaxOverheadIterationCount)
{
return true;
}

return false;
}
}

private sealed class SpecificEvaluator(EngineActualStage stage) : IEngineStageEvaluator
{
public int MaxIterationCount => stage.iterationCount ?? DefaultWorkloadCount;

private int _iterationCount = 0;

public bool EvaluateShouldStop(List<Measurement> measurements, ref long invokeCount)
=> ++_iterationCount > MaxIterationCount;
}
}
}
82 changes: 82 additions & 0 deletions src/BenchmarkDotNet/Engines/EnginePilotStage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -133,5 +133,87 @@ private PilotStageResult RunSpecific()
}

private long Autocorrect(long count) => (count + unrollFactor - 1) / unrollFactor * unrollFactor;

internal IEngineStageEvaluator GetEvaluator()
{
// If InvocationCount is specified, pilot stage should be skipped
return TargetJob.HasValue(RunMode.InvocationCountCharacteristic) ? null
// Here we want to guess "perfect" amount of invocation
: TargetJob.HasValue(RunMode.IterationTimeCharacteristic) ? new SpecificEvaluator(this)
: new AutoEvaluator(this);
}

private sealed class AutoEvaluator(EnginePilotStage stage) : IEngineStageEvaluator
{
public int MaxIterationCount => 0;

public bool EvaluateShouldStop(List<Measurement> measurements, ref long invokeCount)
{
if (measurements.Count == 0)
{
invokeCount = stage.Autocorrect(stage.minInvokeCount);
return false;
}

var measurement = measurements[measurements.Count - 1];
double iterationTime = measurement.Nanoseconds;
double operationError = 2.0 * stage.resolution / invokeCount; // An operation error which has arisen due to the Chronometer precision

// Max acceptable operation error
double operationMaxError1 = iterationTime / invokeCount * stage.maxRelativeError;
double operationMaxError2 = stage.maxAbsoluteError?.Nanoseconds ?? double.MaxValue;
double operationMaxError = Math.Min(operationMaxError1, operationMaxError2);

bool isFinished = operationError < operationMaxError && iterationTime >= stage.minIterationTime.Nanoseconds;
if (isFinished || invokeCount >= MaxInvokeCount)
{
return true;
}

if (stage.unrollFactor == 1 && invokeCount < EnvironmentResolver.DefaultUnrollFactorForThroughput)
{
++invokeCount;
}
else
{
invokeCount *= 2;
}

return false;
}
}

private sealed class SpecificEvaluator(EnginePilotStage stage) : IEngineStageEvaluator
{
public int MaxIterationCount => 0;

private int _downCount = 0; // Amount of iterations where newInvokeCount < invokeCount

public bool EvaluateShouldStop(List<Measurement> measurements, ref long invokeCount)
{
if (measurements.Count == 0)
{
invokeCount = stage.Autocorrect(Engine.MinInvokeCount);
return false;
}

var measurement = measurements[measurements.Count - 1];
double actualIterationTime = measurement.Nanoseconds;
long newInvokeCount = stage.Autocorrect(Math.Max(stage.minInvokeCount, (long) Math.Round(invokeCount * stage.targetIterationTime / actualIterationTime)));

if (newInvokeCount < invokeCount)
{
_downCount++;
}

if (Math.Abs(newInvokeCount - invokeCount) <= 1 || _downCount >= 3)
{
return true;
}

invokeCount = newInvokeCount;
return false;
}
}
}
}
14 changes: 14 additions & 0 deletions src/BenchmarkDotNet/Engines/EngineWarmupStage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,19 @@ internal IReadOnlyList<Measurement> Run(long invokeCount, IterationMode iteratio
var criteria = DefaultStoppingCriteriaFactory.Instance.CreateWarmup(engine.TargetJob, engine.Resolver, iterationMode, runStrategy);
return Run(criteria, invokeCount, iterationMode, IterationStage.Warmup, unrollFactor);
}

internal IEngineStageEvaluator GetOverheadEvaluator()
=> new Evaluator(DefaultStoppingCriteriaFactory.Instance.CreateWarmup(engine.TargetJob, engine.Resolver, IterationMode.Overhead, RunStrategy.Throughput));

internal IEngineStageEvaluator GetWorkloadEvaluator(RunStrategy runStrategy)
=> new Evaluator(DefaultStoppingCriteriaFactory.Instance.CreateWarmup(engine.TargetJob, engine.Resolver, IterationMode.Workload, runStrategy));

private sealed class Evaluator(IStoppingCriteria stoppingCriteria) : IEngineStageEvaluator
{
public int MaxIterationCount => stoppingCriteria.MaxIterationCount;

public bool EvaluateShouldStop(List<Measurement> measurements, ref long invokeCount)
=> stoppingCriteria.Evaluate(measurements).IsFinished;
}
}
}
11 changes: 11 additions & 0 deletions src/BenchmarkDotNet/Engines/IEngineStageEvaluator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
using System.Collections.Generic;
using BenchmarkDotNet.Reports;

namespace BenchmarkDotNet.Engines
{
internal interface IEngineStageEvaluator
{
bool EvaluateShouldStop(List<Measurement> measurements, ref long invokeCount);
int MaxIterationCount { get; }
}
}