1
1
using System ;
2
2
using System . Collections . Generic ;
3
3
using System . Diagnostics . CodeAnalysis ;
4
+ using System . Runtime . CompilerServices ;
5
+
4
6
5
7
6
8
#if ! NETSTANDARD2_0
@@ -172,6 +174,7 @@ private unsafe void IncrementStd(T value)
172
174
}
173
175
174
176
// Applies another round of hashing for additional randomization
177
+ //[MethodImpl(MethodImplOptions.AggressiveInlining)]
175
178
private static int Rehash ( int x )
176
179
{
177
180
x = ( int ) ( x * 0x31848bab ) ;
@@ -180,6 +183,7 @@ private static int Rehash(int x)
180
183
}
181
184
182
185
// Applies a supplemental hash functions to defends against poor quality hash.
186
+ //[MethodImpl(MethodImplOptions.AggressiveInlining)]
183
187
private static int Spread ( int x )
184
188
{
185
189
x ^= ( int ) ( ( uint ) x >> 17 ) ;
@@ -231,40 +235,28 @@ private void Reset()
231
235
}
232
236
233
237
#if ! NETSTANDARD2_0
238
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
239
+ //[MethodImpl((MethodImplOptions)512)]
234
240
private unsafe int EstimateFrequencyAvx ( T value )
235
241
{
236
242
int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
237
243
int counterHash = Rehash ( blockHash ) ;
238
244
int block = ( blockHash & blockMask ) << 3 ;
239
245
240
- Vector128 < int > h = Vector128 . Create ( counterHash ) ;
241
- h = Avx2 . ShiftRightLogicalVariable ( h . AsUInt32 ( ) , Vector128 . Create ( 0U , 8U , 16U , 24U ) ) . AsInt32 ( ) ;
246
+ Vector128 < int > h = Avx2 . ShiftRightLogicalVariable ( Vector128 . Create ( counterHash ) . AsUInt32 ( ) , Vector128 . Create ( 0U , 8U , 16U , 24U ) ) . AsInt32 ( ) ;
247
+ Vector128 < int > index = Avx2 . ShiftLeftLogical ( Avx2 . And ( Avx2 . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 15 ) ) , 2 ) ;
248
+ Vector128 < int > blockOffset = Avx2 . Add ( Avx2 . Add ( Vector128 . Create ( block ) , Avx2 . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
242
249
243
- var index = Avx2 . ShiftRightLogical ( h , 1 ) ;
244
- index = Avx2 . And ( index , Vector128 . Create ( 15 ) ) ; // j - counter index
245
- Vector128 < int > offset = Avx2 . And ( h , Vector128 . Create ( 1 ) ) ;
246
- Vector128 < int > blockOffset = Avx2 . Add ( Vector128 . Create ( block ) , offset ) ; // i - table index
247
- blockOffset = Avx2 . Add ( blockOffset , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ; // + (i << 1)
250
+ Vector256 < ulong > indexLong = Avx2 . PermuteVar8x32 ( Vector256 . Create ( index , Vector128 < int > . Zero ) , Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ) . AsUInt64 ( ) ;
248
251
249
252
fixed ( long * tablePtr = table )
250
253
{
251
- Vector256 < long > tableVector = Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) ;
252
- index = Avx2 . ShiftLeftLogical ( index , 2 ) ;
253
-
254
- // convert index from int to long via permute
255
- Vector256 < long > indexLong = Vector256 . Create ( index , Vector128 < int > . Zero ) . AsInt64 ( ) ;
256
- Vector256 < int > permuteMask2 = Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ;
257
- indexLong = Avx2 . PermuteVar8x32 ( indexLong . AsInt32 ( ) , permuteMask2 ) . AsInt64 ( ) ;
258
- tableVector = Avx2 . ShiftRightLogicalVariable ( tableVector , indexLong . AsUInt64 ( ) ) ;
259
- tableVector = Avx2 . And ( tableVector , Vector256 . Create ( 0xfL ) ) ;
260
-
261
- Vector256 < int > permuteMask = Vector256 . Create ( 0 , 2 , 4 , 6 , 1 , 3 , 5 , 7 ) ;
262
- Vector128 < ushort > count = Avx2 . PermuteVar8x32 ( tableVector . AsInt32 ( ) , permuteMask )
254
+ Vector128 < ushort > count = Avx2 . PermuteVar8x32 ( Avx2 . And ( Avx2 . ShiftRightLogicalVariable ( Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) , indexLong ) , Vector256 . Create ( 0xfL ) ) . AsInt32 ( ) , Vector256 . Create ( 0 , 2 , 4 , 6 , 1 , 3 , 5 , 7 ) )
263
255
. GetLower ( )
264
256
. AsUInt16 ( ) ;
265
257
266
258
// set the zeroed high parts of the long value to ushort.Max
267
- #if NET6_0
259
+ #if NET6_0_OR_GREATER
268
260
count = Avx2 . Blend ( count , Vector128 < ushort > . AllBitsSet , 0b10101010 ) ;
269
261
#else
270
262
count = Avx2 . Blend ( count , Vector128 . Create ( ushort . MaxValue ) , 0b10101010 ) ;
@@ -274,48 +266,30 @@ private unsafe int EstimateFrequencyAvx(T value)
274
266
}
275
267
}
276
268
269
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
270
+ //[MethodImpl((MethodImplOptions)512)]
277
271
private unsafe void IncrementAvx ( T value )
278
272
{
279
273
int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
280
274
int counterHash = Rehash ( blockHash ) ;
281
275
int block = ( blockHash & blockMask ) << 3 ;
282
276
283
- Vector128 < int > h = Vector128 . Create ( counterHash ) ;
284
- h = Avx2 . ShiftRightLogicalVariable ( h . AsUInt32 ( ) , Vector128 . Create ( 0U , 8U , 16U , 24U ) ) . AsInt32 ( ) ;
277
+ Vector128 < int > h = Avx2 . ShiftRightLogicalVariable ( Vector128 . Create ( counterHash ) . AsUInt32 ( ) , Vector128 . Create ( 0U , 8U , 16U , 24U ) ) . AsInt32 ( ) ;
278
+ Vector128 < int > index = Avx2 . ShiftLeftLogical ( Avx2 . And ( Avx2 . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 15 ) ) , 2 ) ;
279
+ Vector128 < int > blockOffset = Avx2 . Add ( Avx2 . Add ( Vector128 . Create ( block ) , Avx2 . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
285
280
286
- Vector128 < int > index = Avx2 . ShiftRightLogical ( h , 1 ) ;
287
- index = Avx2 . And ( index , Vector128 . Create ( 15 ) ) ; // j - counter index
288
- Vector128 < int > offset = Avx2 . And ( h , Vector128 . Create ( 1 ) ) ;
289
- Vector128 < int > blockOffset = Avx2 . Add ( Vector128 . Create ( block ) , offset ) ; // i - table index
290
- blockOffset = Avx2 . Add ( blockOffset , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ; // + (i << 1)
281
+ Vector256 < ulong > offsetLong = Avx2 . PermuteVar8x32 ( Vector256 . Create ( index , Vector128 < int > . Zero ) , Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ) . AsUInt64 ( ) ;
282
+ Vector256 < long > mask = Avx2 . ShiftLeftLogicalVariable ( Vector256 . Create ( 0xfL ) , offsetLong ) ;
291
283
292
284
fixed ( long * tablePtr = table )
293
285
{
294
- Vector256 < long > tableVector = Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) ;
295
-
296
- // j == index
297
- index = Avx2 . ShiftLeftLogical ( index , 2 ) ;
298
- Vector256 < long > offsetLong = Vector256 . Create ( index , Vector128 < int > . Zero ) . AsInt64 ( ) ;
299
-
300
- Vector256 < int > permuteMask = Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ;
301
- offsetLong = Avx2 . PermuteVar8x32 ( offsetLong . AsInt32 ( ) , permuteMask ) . AsInt64 ( ) ;
302
-
303
- // mask = (0xfL << offset)
304
- Vector256 < long > fifteen = Vector256 . Create ( 0xfL ) ;
305
- Vector256 < long > mask = Avx2 . ShiftLeftLogicalVariable ( fifteen , offsetLong . AsUInt64 ( ) ) ;
306
-
307
- // (table[i] & mask) != mask)
308
286
// Note masked is 'equal' - therefore use AndNot below
309
- Vector256 < long > masked = Avx2 . CompareEqual ( Avx2 . And ( tableVector , mask ) , mask ) ;
310
-
311
- // 1L << offset
312
- Vector256 < long > inc = Avx2 . ShiftLeftLogicalVariable ( Vector256 . Create ( 1L ) , offsetLong . AsUInt64 ( ) ) ;
287
+ Vector256 < long > masked = Avx2 . CompareEqual ( Avx2 . And ( Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) , mask ) , mask ) ;
313
288
314
289
// Mask to zero out non matches (add zero below) - first operand is NOT then AND result (order matters)
315
- inc = Avx2 . AndNot ( masked , inc ) ;
290
+ Vector256 < long > inc = Avx2 . AndNot ( masked , Avx2 . ShiftLeftLogicalVariable ( Vector256 . Create ( 1L ) , offsetLong ) ) ;
316
291
317
- Vector256 < byte > result = Avx2 . CompareEqual ( masked . AsByte ( ) , Vector256 < byte > . Zero ) ;
318
- bool wasInc = Avx2 . MoveMask ( result . AsByte ( ) ) == unchecked ( ( int ) ( 0b1111_1111_1111_1111_1111_1111_1111_1111 ) ) ;
292
+ bool wasInc = Avx2 . MoveMask ( Avx2 . CompareEqual ( masked . AsByte ( ) , Vector256 < byte > . Zero ) . AsByte ( ) ) == unchecked ( ( int ) ( 0b1111_1111_1111_1111_1111_1111_1111_1111 ) ) ;
319
293
320
294
tablePtr [ blockOffset . GetElement ( 0 ) ] += inc . GetElement ( 0 ) ;
321
295
tablePtr [ blockOffset . GetElement ( 1 ) ] += inc . GetElement ( 1 ) ;
0 commit comments