@@ -422,7 +422,7 @@ where
422
422
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
423
423
// expected to be strictly positive because b_UQ1_hw has its highest bit set
424
424
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
425
- let corr_uq1_hw: HalfRep < F > = zero_hw. wrapping_sub ( x_uq0_hw. widen_mul ( b_uq1_hw) . hi ( ) ) ;
425
+ // let corr_uq1_hw: HalfRep<F> = zero_hw.wrapping_sub(x_uq0_hw.widen_mul(b_uq1_hw).hi());
426
426
427
427
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
428
428
// obtaining an UQ1.(HW-1) number and proving its highest bit could be
@@ -436,7 +436,7 @@ where
436
436
// The fact corr_UQ1_hw was virtually round up (due to result of
437
437
// multiplication being **first** truncated, then negated - to improve
438
438
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
439
- x_uq0_hw = ( x_uq0_hw. widen_mul ( corr_uq1_hw) >> ( hw - 1 ) ) . cast ( ) ;
439
+ // x_uq0_hw = (x_uq0_hw.widen_mul(corr_uq1_hw) >> (hw - 1)).cast();
440
440
441
441
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
442
442
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
@@ -453,6 +453,8 @@ where
453
453
// = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw
454
454
// \------ >0 -------/ \-- >0 ---/
455
455
// abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U)
456
+
457
+ x_uq0_hw = iter_once ( x_uq0_hw, b_uq1_hw) ;
456
458
}
457
459
458
460
// For initial half-width iterations, U = 2^-HW
@@ -527,8 +529,7 @@ where
527
529
} ;
528
530
529
531
for _ in 0 ..full_iterations {
530
- let corr_uq1: F :: Int = zero. wrapping_sub ( x_uq0. widen_mul ( b_uq1) . hi ( ) ) ;
531
- x_uq0 = ( x_uq0. widen_mul ( corr_uq1) >> ( F :: BITS - 1 ) ) . lo ( ) ;
532
+ x_uq0 = iter_once ( x_uq0, b_uq1) ;
532
533
}
533
534
534
535
// Finally, account for possible overflow, as explained above.
0 commit comments