@@ -1470,7 +1470,7 @@ library Lib512MathArithmetic {
14701470 /// normalization means our mantissa is geometrically symmetric around 1, leading to 16
14711471 /// buckets on the low side and 32 buckets on the high side.
14721472 // `Y` _ultimately_ approximates the inverse square root of fixnum `M` as a
1473- // Q1.255 . However, as a gas optimization, the number of fractional bits in `Y` rises
1473+ // Q3.253 . However, as a gas optimization, the number of fractional bits in `Y` rises
14741474 // through the steps, giving an inhomogeneous fixed-point representation. Y ≈∈ [√½, √2]
14751475 uint256 Y; // scale: 2⁽²⁵⁵⁺ᵉ⁾
14761476 assembly ("memory-safe" ) {
@@ -1505,7 +1505,7 @@ library Lib512MathArithmetic {
15051505 // The Newton-Raphson iteration for 1/√M is:
15061506 // Y ≈ Y · (3 - M · Y²) / 2
15071507 // The implementation of this iteration is deliberately imprecise. No matter how many
1508- // times you run it, you won't converge `Y` on the closest Q1.255 to √M. However, this
1508+ // times you run it, you won't converge `Y` on the closest Q3.253 to √M. However, this
15091509 // is acceptable because the cleanup step applied after the final call is very tolerant
15101510 // of error in the low bits of `Y`.
15111511
@@ -1550,23 +1550,22 @@ library Lib512MathArithmetic {
15501550 uint256 MY2 = _inaccurateMulHi (M, Y2); // scale: 2²⁵⁴
15511551 uint256 T = 1.5 * 2 ** 254 - MY2; // scale: 2²⁵⁴
15521552 Y = _inaccurateMulHi (Y << 128 , T); // scale: 2²⁵³
1553- Y <<= 2 ; // scale: 2²⁵⁵ (Q1.255 format; effectively Q1.253)
15541553 }
1555- // `Y` is Q1.255
1554+ // `Y` is Q3.253
15561555
15571556 /// When we combine `Y` with `M` to form our approximation of the square root, we have
15581557 /// to un-normalize by the half-scale value. This is where even-exponent normalization
15591558 /// comes in because the half-scale is integral.
15601559 /// M = ⌊x · 2⁽²⁵⁵⁻²ᵉ⁾⌋
1561- /// Y ≈ 2²⁵⁵ / √(M / 2²⁵⁵)
1562- /// Y ≈ 2³⁸³ / √(2·M)
1563- /// M·Y ≈ 2³⁸³ · √(M/2)
1564- /// M·Y ≈ 2⁽⁵¹⁰ ⁻ᵉ⁾ · √x
1565- /// r0 ≈ M·Y / 2⁽⁵¹⁰ ⁻ᵉ⁾ ≈ ⌊√x⌋
1566- // We shift right by `510 - e` to account for both the Q1.255 scaling and
1560+ /// Y ≈ 2²⁵³ / √(M / 2²⁵⁵)
1561+ /// Y ≈ 2³⁸¹ / √(2·M)
1562+ /// M·Y ≈ 2³⁸¹ · √(M/2)
1563+ /// M·Y ≈ 2⁽⁵⁰⁸ ⁻ᵉ⁾ · √x
1564+ /// r0 ≈ M·Y / 2⁽⁵⁰⁸ ⁻ᵉ⁾ ≈ ⌊√x⌋
1565+ // We shift right by `508 - e` to account for both the Q3.253 scaling and
15671566 // denormalization. We don't care about accuracy in the low bits of `r0`, so we can cut
15681567 // some corners.
1569- (, uint256 r0 ) = _shr (_inaccurateMulHi (M, Y), 0 , 254 + invE);
1568+ (, uint256 r0 ) = _shr (_inaccurateMulHi (M, Y), 0 , 252 + invE);
15701569
15711570 /// `r0` is only an approximation of √x, so we perform a single Babylonian step to fully
15721571 /// converge on ⌊√x⌋ or ⌈√x⌉. The Babylonian step is:
0 commit comments