@@ -1352,6 +1352,8 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
1352
1352
Vector128 < byte > fourthByte = Vector128 . Create ( ( byte ) ( 0b11110000u - 0x80 ) ) ;
1353
1353
Vector128 < byte > v0f = Vector128 . Create ( ( byte ) 0x0F ) ;
1354
1354
Vector128 < byte > v80 = Vector128 . Create ( ( byte ) 0x80 ) ;
1355
+ Vector128 < byte > fourthByteMinusOne = Vector128 . Create ( ( byte ) ( 0b11110000u - 1 ) ) ;
1356
+ Vector128 < sbyte > largestcont = Vector128 . Create ( ( sbyte ) - 65 ) ; // -65 => 0b10111111
1355
1357
// Performance note: we could process 64 bytes at a time for better speed in some cases.
1356
1358
int start_point = processedLength ;
1357
1359
@@ -1362,13 +1364,13 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
1362
1364
{
1363
1365
1364
1366
Vector128 < byte > currentBlock = AdvSimd . LoadVector128 ( pInputBuffer + processedLength ) ;
1365
- if ( AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( AdvSimd . And ( currentBlock , v80 ) ) ) . ToScalar ( ) == 0 )
1367
+ if ( ( currentBlock & v80 ) == Vector128 < byte > . Zero )
1366
1368
// We could also use (AdvSimd.Arm64.MaxAcross(currentBlock).ToScalar() <= 127) but it is slower on some
1367
1369
// hardware.
1368
1370
{
1369
1371
// We have an ASCII block, no need to process it, but
1370
1372
// we need to check if the previous block was incomplete.
1371
- if ( AdvSimd . Arm64 . MaxAcross ( prevIncomplete ) . ToScalar ( ) != 0 )
1373
+ if ( prevIncomplete != Vector128 < byte > . Zero )
1372
1374
{
1373
1375
int off = processedLength >= 3 ? processedLength - 3 : processedLength ;
1374
1376
byte * invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 16 - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
@@ -1402,7 +1404,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
1402
1404
Vector128 < byte > block4 = AdvSimd . LoadVector128 ( pInputBuffer + processedLength + localasciirun + 48 ) ;
1403
1405
Vector128 < byte > or = AdvSimd . Or ( AdvSimd . Or ( block1 , block2 ) , AdvSimd . Or ( block3 , block4 ) ) ;
1404
1406
1405
- if ( AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( AdvSimd . And ( or , v80 ) ) ) . ToScalar ( ) != 0 )
1407
+ if ( ( or & v80 ) != Vector128 < byte > . Zero )
1406
1408
{
1407
1409
break ;
1408
1410
}
@@ -1433,7 +1435,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
1433
1435
// AdvSimd.Arm64.MaxAcross(error) works, but it might be slower
1434
1436
// than AdvSimd.Arm64.MaxAcross(Vector128.AsUInt32(error)) on some
1435
1437
// hardware:
1436
- if ( AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( error ) ) . ToScalar ( ) != 0 )
1438
+ if ( error != Vector128 < byte > . Zero )
1437
1439
{
1438
1440
byte * invalidBytePointer ;
1439
1441
if ( processedLength == 0 )
@@ -1457,18 +1459,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
1457
1459
return invalidBytePointer ;
1458
1460
}
1459
1461
prevIncomplete = AdvSimd . SubtractSaturate ( currentBlock , maxValue ) ;
1460
- Vector128 < sbyte > largestcont = Vector128 . Create ( ( sbyte ) - 65 ) ; // -65 => 0b10111111
1461
1462
contbytes += - AdvSimd . Arm64 . AddAcross ( AdvSimd . CompareLessThanOrEqual ( Vector128 . AsSByte ( currentBlock ) , largestcont ) ) . ToScalar ( ) ;
1462
-
1463
- // computing n4 is more expensive than we would like:
1464
- Vector128 < byte > fourthByteMinusOne = Vector128 . Create ( ( byte ) ( 0b11110000u - 1 ) ) ;
1465
1463
Vector128 < byte > largerthan0f = AdvSimd . CompareGreaterThan ( currentBlock , fourthByteMinusOne ) ;
1466
- byte n4add = ( byte ) AdvSimd . Arm64 . AddAcross ( largerthan0f ) . ToScalar ( ) ;
1467
- int negn4add = ( int ) ( byte ) - n4add ;
1468
- n4 += negn4add ;
1464
+ if ( largerthan0f != Vector128 < byte > . Zero )
1465
+ {
1466
+ byte n4add = ( byte ) AdvSimd . Arm64 . AddAcross ( largerthan0f ) . ToScalar ( ) ;
1467
+ int negn4add = ( int ) ( byte ) - n4add ;
1468
+ n4 += negn4add ;
1469
+ }
1469
1470
}
1470
1471
}
1471
- bool hasIncompete = AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( prevIncomplete ) ) . ToScalar ( ) != 0 ;
1472
+ bool hasIncompete = ( prevIncomplete != Vector128 < byte > . Zero ) ;
1472
1473
if ( processedLength < inputLength || hasIncompete )
1473
1474
{
1474
1475
byte * invalidBytePointer ;
0 commit comments