@@ -494,32 +494,32 @@ private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnl
494
494
var f2 = Vector256 . Create ( 2f ) ;
495
495
var f025 = Vector256 . Create ( 0.25f ) ;
496
496
Vector256 < int > switchInnerDoubleWords = Unsafe . As < byte , Vector256 < int > > ( ref MemoryMarshal . GetReference ( SimdUtils . HwIntrinsics . PermuteMaskSwitchInnerDWords8x32 ) ) ;
497
-
498
- ref Vector256 < float > in1 = ref Unsafe . As < Block8x8F , Vector256 < float > > ( ref MemoryMarshal . GetReference ( source ) ) ;
499
- ref Vector256 < float > in2 = ref Unsafe . As < Block8x8F , Vector256 < float > > ( ref Unsafe . Add ( ref MemoryMarshal . GetReference ( source ) , 1 ) ) ;
500
497
ref Vector256 < float > destRef = ref Unsafe . As < Block8x8F , Vector256 < float > > ( ref destination ) ;
501
498
502
- for ( int i = 0 ; i < 8 ; i ++ )
499
+ for ( int i = 0 ; i < 2 ; i ++ )
503
500
{
504
- Vector256 < float > a = in1 ;
505
- Vector256 < float > b = Unsafe . Add ( ref in1 , 1 ) ;
506
- Vector256 < float > c = in2 ;
507
- Vector256 < float > d = Unsafe . Add ( ref in2 , 1 ) ;
508
-
509
- Vector256 < float > calc1 = Avx . Shuffle ( a , c , 0b10_00_10_00 ) ;
510
- Vector256 < float > calc2 = Avx . Shuffle ( a , c , 0b11_01_11_01 ) ;
511
- Vector256 < float > calc3 = Avx . Shuffle ( b , d , 0b10_00_10_00 ) ;
512
- Vector256 < float > calc4 = Avx . Shuffle ( b , d , 0b11_01_11_01 ) ;
513
-
514
- Vector256 < float > sum = Avx . Add ( Avx . Add ( calc1 , calc2 ) , Avx . Add ( calc3 , calc4 ) ) ;
515
- Vector256 < float > add = Avx . Add ( sum , f2 ) ;
516
- Vector256 < float > res = Avx . Multiply ( add , f025 ) ;
501
+ ref Vector256 < float > in1 = ref Unsafe . As < Block8x8F , Vector256 < float > > ( ref Unsafe . Add ( ref MemoryMarshal . GetReference ( source ) , 2 * i ) ) ;
502
+ ref Vector256 < float > in2 = ref Unsafe . As < Block8x8F , Vector256 < float > > ( ref Unsafe . Add ( ref MemoryMarshal . GetReference ( source ) , ( 2 * i ) + 1 ) ) ;
517
503
518
- destRef = Avx2 . PermuteVar8x32 ( res , switchInnerDoubleWords ) ;
519
- destRef = ref Unsafe . Add ( ref destRef , 1 ) ;
520
-
521
- in1 = ref Unsafe . Add ( ref in1 , 2 ) ;
522
- in2 = ref Unsafe . Add ( ref in2 , 2 ) ;
504
+ for ( int j = 0 ; j < 8 ; j += 2 )
505
+ {
506
+ Vector256 < float > a = Unsafe . Add ( ref in1 , j ) ;
507
+ Vector256 < float > b = Unsafe . Add ( ref in1 , j + 1 ) ;
508
+ Vector256 < float > c = Unsafe . Add ( ref in2 , j ) ;
509
+ Vector256 < float > d = Unsafe . Add ( ref in2 , j + 1 ) ;
510
+
511
+ Vector256 < float > calc1 = Avx . Shuffle ( a , c , 0b10_00_10_00 ) ;
512
+ Vector256 < float > calc2 = Avx . Shuffle ( a , c , 0b11_01_11_01 ) ;
513
+ Vector256 < float > calc3 = Avx . Shuffle ( b , d , 0b10_00_10_00 ) ;
514
+ Vector256 < float > calc4 = Avx . Shuffle ( b , d , 0b11_01_11_01 ) ;
515
+
516
+ Vector256 < float > sum = Avx . Add ( Avx . Add ( calc1 , calc2 ) , Avx . Add ( calc3 , calc4 ) ) ;
517
+ Vector256 < float > add = Avx . Add ( sum , f2 ) ;
518
+ Vector256 < float > res = Avx . Multiply ( add , f025 ) ;
519
+
520
+ destRef = Avx2 . PermuteVar8x32 ( res , switchInnerDoubleWords ) ;
521
+ destRef = ref Unsafe . Add ( ref destRef , 1 ) ;
522
+ }
523
523
}
524
524
#endif
525
525
}
0 commit comments