Skip to content

Commit 78dfea8

Browse files
committed
pot
1 parent 4c0e26a commit 78dfea8

File tree

5 files changed

+41
-55
lines changed

5 files changed

+41
-55
lines changed

BitFaster.Caching.Benchmarks/Lfu/SketchFrequency.cs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,15 @@
77

88
namespace BitFaster.Caching.Benchmarks.Lfu
99
{
10+
#if Windows
11+
[DisassemblyDiagnoser(printSource: true, maxDepth: 4)]
12+
#endif
1013
[SimpleJob(RuntimeMoniker.Net60)]
14+
[SimpleJob(RuntimeMoniker.Net80)]
15+
[SimpleJob(RuntimeMoniker.Net90)]
1116
[MemoryDiagnoser(displayGenColumns: false)]
1217
[HideColumns("Job", "Median", "RatioSD", "Alloc Ratio")]
13-
[ColumnChart(Title ="Sketch Frequency ({JOB})")]
18+
[ColumnChart(Title ="Sketch Frequency ({JOB})", Colors = "#cd5c5c,#fa8072,#ffa07a")]
1419
public class SketchFrequency
1520
{
1621
const int sketchSize = 1_048_576;
@@ -22,7 +27,7 @@ public class SketchFrequency
2227
private CmSketchCore<int, DisableHardwareIntrinsics> blockStd;
2328
private CmSketchCore<int, DetectIsa> blockAvx;
2429

25-
[Params(32_768, 524_288, 8_388_608, 134_217_728)]
30+
[Params(512, 1024, 32_768, 524_288, 8_388_608, 134_217_728)]
2631
public int Size { get; set; }
2732

2833
[GlobalSetup]
@@ -45,7 +50,7 @@ public int FrequencyFlat()
4550
return count;
4651
}
4752

48-
[Benchmark(OperationsPerInvoke = iterations)]
53+
//[Benchmark(OperationsPerInvoke = iterations)]
4954
public int FrequencyFlatAvx()
5055
{
5156
int count = 0;

BitFaster.Caching.Benchmarks/Lfu/SketchIncrement.cs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,21 @@
11

22
using System.Collections.Generic;
3+
using Benchly;
34
using BenchmarkDotNet.Attributes;
45
using BenchmarkDotNet.Jobs;
56
using BitFaster.Caching.Lfu;
67

78
namespace BitFaster.Caching.Benchmarks.Lfu
89
{
10+
#if Windows
11+
[DisassemblyDiagnoser(printSource: true, maxDepth: 4)]
12+
#endif
913
[SimpleJob(RuntimeMoniker.Net60)]
14+
[SimpleJob(RuntimeMoniker.Net80)]
15+
[SimpleJob(RuntimeMoniker.Net90)]
1016
[MemoryDiagnoser(displayGenColumns: false)]
1117
[HideColumns("Job", "Median", "RatioSD", "Alloc Ratio")]
18+
[ColumnChart(Title = "Sketch Increment ({JOB})", Colors = "#cd5c5c,#fa8072,#ffa07a")]
1219
public class SketchIncrement
1320
{
1421
const int iterations = 1_048_576;
@@ -19,7 +26,7 @@ public class SketchIncrement
1926
private CmSketchCore<int, DisableHardwareIntrinsics> blockStd;
2027
private CmSketchCore<int, DetectIsa> blockAvx;
2128

22-
[Params(32_768, 524_288, 8_388_608, 134_217_728)]
29+
[Params(512, 1024, 32_768, 524_288, 8_388_608, 134_217_728)]
2330
public int Size { get; set; }
2431

2532
[GlobalSetup]
@@ -41,7 +48,7 @@ public void IncFlat()
4148
}
4249
}
4350

44-
[Benchmark(OperationsPerInvoke = iterations)]
51+
//[Benchmark(OperationsPerInvoke = iterations)]
4552
public void IncFlatAvx()
4653
{
4754
for (int i = 0; i < iterations; i++)

BitFaster.Caching.ThroughputAnalysis/BitFaster.Caching.ThroughputAnalysis.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
<PropertyGroup>
44
<OutputType>Exe</OutputType>
5-
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
5+
<TargetFrameworks>net6.0;net8.0;net9.0</TargetFrameworks>
66
<SignAssembly>False</SignAssembly>
77
<Version>2.0.0</Version>
88
<ServerGarbageCollection>true</ServerGarbageCollection>

BitFaster.Caching.UnitTests/BitFaster.Caching.UnitTests.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
4-
<TargetFrameworks>net48;netcoreapp3.1;net6.0</TargetFrameworks>
4+
<TargetFrameworks>net48;netcoreapp3.1;net6.0;net8.0</TargetFrameworks>
55
<LangVersion>9.0</LangVersion>
66
</PropertyGroup>
77

BitFaster.Caching/Lfu/CmSketchCore.cs

Lines changed: 22 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
using System;
22
using System.Collections.Generic;
33
using System.Diagnostics.CodeAnalysis;
4+
using System.Runtime.CompilerServices;
5+
46

57

68
#if !NETSTANDARD2_0
@@ -172,6 +174,7 @@ private unsafe void IncrementStd(T value)
172174
}
173175

174176
// Applies another round of hashing for additional randomization
177+
//[MethodImpl(MethodImplOptions.AggressiveInlining)]
175178
private static int Rehash(int x)
176179
{
177180
x = (int)(x * 0x31848bab);
@@ -180,6 +183,7 @@ private static int Rehash(int x)
180183
}
181184

182185
// Applies a supplemental hash functions to defends against poor quality hash.
186+
//[MethodImpl(MethodImplOptions.AggressiveInlining)]
183187
private static int Spread(int x)
184188
{
185189
x ^= (int)((uint)x >> 17);
@@ -231,40 +235,28 @@ private void Reset()
231235
}
232236

233237
#if !NETSTANDARD2_0
238+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
239+
//[MethodImpl((MethodImplOptions)512)]
234240
private unsafe int EstimateFrequencyAvx(T value)
235241
{
236242
int blockHash = Spread(comparer.GetHashCode(value));
237243
int counterHash = Rehash(blockHash);
238244
int block = (blockHash & blockMask) << 3;
239245

240-
Vector128<int> h = Vector128.Create(counterHash);
241-
h = Avx2.ShiftRightLogicalVariable(h.AsUInt32(), Vector128.Create(0U, 8U, 16U, 24U)).AsInt32();
246+
Vector128<int> h = Avx2.ShiftRightLogicalVariable(Vector128.Create(counterHash).AsUInt32(), Vector128.Create(0U, 8U, 16U, 24U)).AsInt32();
247+
Vector128<int> index = Avx2.ShiftLeftLogical(Avx2.And(Avx2.ShiftRightLogical(h, 1), Vector128.Create(15)), 2);
248+
Vector128<int> blockOffset = Avx2.Add(Avx2.Add(Vector128.Create(block), Avx2.And(h, Vector128.Create(1))), Vector128.Create(0, 2, 4, 6));
242249

243-
var index = Avx2.ShiftRightLogical(h, 1);
244-
index = Avx2.And(index, Vector128.Create(15)); // j - counter index
245-
Vector128<int> offset = Avx2.And(h, Vector128.Create(1));
246-
Vector128<int> blockOffset = Avx2.Add(Vector128.Create(block), offset); // i - table index
247-
blockOffset = Avx2.Add(blockOffset, Vector128.Create(0, 2, 4, 6)); // + (i << 1)
250+
Vector256<ulong> indexLong = Avx2.PermuteVar8x32(Vector256.Create(index, Vector128<int>.Zero), Vector256.Create(0, 4, 1, 5, 2, 5, 3, 7)).AsUInt64();
248251

249252
fixed (long* tablePtr = table)
250253
{
251-
Vector256<long> tableVector = Avx2.GatherVector256(tablePtr, blockOffset, 8);
252-
index = Avx2.ShiftLeftLogical(index, 2);
253-
254-
// convert index from int to long via permute
255-
Vector256<long> indexLong = Vector256.Create(index, Vector128<int>.Zero).AsInt64();
256-
Vector256<int> permuteMask2 = Vector256.Create(0, 4, 1, 5, 2, 5, 3, 7);
257-
indexLong = Avx2.PermuteVar8x32(indexLong.AsInt32(), permuteMask2).AsInt64();
258-
tableVector = Avx2.ShiftRightLogicalVariable(tableVector, indexLong.AsUInt64());
259-
tableVector = Avx2.And(tableVector, Vector256.Create(0xfL));
260-
261-
Vector256<int> permuteMask = Vector256.Create(0, 2, 4, 6, 1, 3, 5, 7);
262-
Vector128<ushort> count = Avx2.PermuteVar8x32(tableVector.AsInt32(), permuteMask)
254+
Vector128<ushort> count = Avx2.PermuteVar8x32(Avx2.And(Avx2.ShiftRightLogicalVariable(Avx2.GatherVector256(tablePtr, blockOffset, 8), indexLong), Vector256.Create(0xfL)).AsInt32(), Vector256.Create(0, 2, 4, 6, 1, 3, 5, 7))
263255
.GetLower()
264256
.AsUInt16();
265257

266258
// set the zeroed high parts of the long value to ushort.Max
267-
#if NET6_0
259+
#if NET6_0_OR_GREATER
268260
count = Avx2.Blend(count, Vector128<ushort>.AllBitsSet, 0b10101010);
269261
#else
270262
count = Avx2.Blend(count, Vector128.Create(ushort.MaxValue), 0b10101010);
@@ -274,48 +266,30 @@ private unsafe int EstimateFrequencyAvx(T value)
274266
}
275267
}
276268

269+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
270+
//[MethodImpl((MethodImplOptions)512)]
277271
private unsafe void IncrementAvx(T value)
278272
{
279273
int blockHash = Spread(comparer.GetHashCode(value));
280274
int counterHash = Rehash(blockHash);
281275
int block = (blockHash & blockMask) << 3;
282276

283-
Vector128<int> h = Vector128.Create(counterHash);
284-
h = Avx2.ShiftRightLogicalVariable(h.AsUInt32(), Vector128.Create(0U, 8U, 16U, 24U)).AsInt32();
277+
Vector128<int> h = Avx2.ShiftRightLogicalVariable(Vector128.Create(counterHash).AsUInt32(), Vector128.Create(0U, 8U, 16U, 24U)).AsInt32();
278+
Vector128<int> index = Avx2.ShiftLeftLogical(Avx2.And(Avx2.ShiftRightLogical(h, 1), Vector128.Create(15)), 2);
279+
Vector128<int> blockOffset = Avx2.Add(Avx2.Add(Vector128.Create(block), Avx2.And(h, Vector128.Create(1))), Vector128.Create(0, 2, 4, 6));
285280

286-
Vector128<int> index = Avx2.ShiftRightLogical(h, 1);
287-
index = Avx2.And(index, Vector128.Create(15)); // j - counter index
288-
Vector128<int> offset = Avx2.And(h, Vector128.Create(1));
289-
Vector128<int> blockOffset = Avx2.Add(Vector128.Create(block), offset); // i - table index
290-
blockOffset = Avx2.Add(blockOffset, Vector128.Create(0, 2, 4, 6)); // + (i << 1)
281+
Vector256<ulong> offsetLong = Avx2.PermuteVar8x32(Vector256.Create(index, Vector128<int>.Zero), Vector256.Create(0, 4, 1, 5, 2, 5, 3, 7)).AsUInt64();
282+
Vector256<long> mask = Avx2.ShiftLeftLogicalVariable(Vector256.Create(0xfL), offsetLong);
291283

292284
fixed (long* tablePtr = table)
293285
{
294-
Vector256<long> tableVector = Avx2.GatherVector256(tablePtr, blockOffset, 8);
295-
296-
// j == index
297-
index = Avx2.ShiftLeftLogical(index, 2);
298-
Vector256<long> offsetLong = Vector256.Create(index, Vector128<int>.Zero).AsInt64();
299-
300-
Vector256<int> permuteMask = Vector256.Create(0, 4, 1, 5, 2, 5, 3, 7);
301-
offsetLong = Avx2.PermuteVar8x32(offsetLong.AsInt32(), permuteMask).AsInt64();
302-
303-
// mask = (0xfL << offset)
304-
Vector256<long> fifteen = Vector256.Create(0xfL);
305-
Vector256<long> mask = Avx2.ShiftLeftLogicalVariable(fifteen, offsetLong.AsUInt64());
306-
307-
// (table[i] & mask) != mask)
308286
// Note masked is 'equal' - therefore use AndNot below
309-
Vector256<long> masked = Avx2.CompareEqual(Avx2.And(tableVector, mask), mask);
310-
311-
// 1L << offset
312-
Vector256<long> inc = Avx2.ShiftLeftLogicalVariable(Vector256.Create(1L), offsetLong.AsUInt64());
287+
Vector256<long> masked = Avx2.CompareEqual(Avx2.And(Avx2.GatherVector256(tablePtr, blockOffset, 8), mask), mask);
313288

314289
// Mask to zero out non matches (add zero below) - first operand is NOT then AND result (order matters)
315-
inc = Avx2.AndNot(masked, inc);
290+
Vector256<long> inc = Avx2.AndNot(masked, Avx2.ShiftLeftLogicalVariable(Vector256.Create(1L), offsetLong));
316291

317-
Vector256<byte> result = Avx2.CompareEqual(masked.AsByte(), Vector256<byte>.Zero);
318-
bool wasInc = Avx2.MoveMask(result.AsByte()) == unchecked((int)(0b1111_1111_1111_1111_1111_1111_1111_1111));
292+
bool wasInc = Avx2.MoveMask(Avx2.CompareEqual(masked.AsByte(), Vector256<byte>.Zero).AsByte()) == unchecked((int)(0b1111_1111_1111_1111_1111_1111_1111_1111));
319293

320294
tablePtr[blockOffset.GetElement(0)] += inc.GetElement(0);
321295
tablePtr[blockOffset.GetElement(1)] += inc.GetElement(1);

0 commit comments

Comments
 (0)