Skip to content

Commit 2c00cec

Browse files
lntuesivan-shani
authored andcommitted
[libc][math] Add float-only option for atan2f. (llvm#122979)
For targets that have single precision FPU but not double precision FPU such as Cortex M4, only using float-float in the intermediate computations might reduce the code size compared to using double. In this case, when the exact pass is skipped, the float-only option for atan2f implemented in this PR reduces the code size of this function by ~1 KB compared to the double precision version.
1 parent 3f6362c commit 2c00cec

File tree

8 files changed

+321
-56
lines changed

8 files changed

+321
-56
lines changed

libc/src/__support/FPUtil/double_double.h

Lines changed: 54 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -20,41 +20,52 @@ namespace fputil {
2020

2121
#define DEFAULT_DOUBLE_SPLIT 27
2222

23-
using DoubleDouble = LIBC_NAMESPACE::NumberPair<double>;
23+
template <typename T> struct DefaultSplit;
24+
template <> struct DefaultSplit<float> {
25+
static constexpr size_t VALUE = 12;
26+
};
27+
template <> struct DefaultSplit<double> {
28+
static constexpr size_t VALUE = DEFAULT_DOUBLE_SPLIT;
29+
};
30+
31+
using DoubleDouble = NumberPair<double>;
32+
using FloatFloat = NumberPair<float>;
2433

2534
// The output of Dekker's FastTwoSum algorithm is correct, i.e.:
2635
// r.hi + r.lo = a + b exactly
2736
// and |r.lo| < eps(r.lo)
2837
// Assumption: |a| >= |b|, or a = 0.
29-
template <bool FAST2SUM = true>
30-
LIBC_INLINE constexpr DoubleDouble exact_add(double a, double b) {
31-
DoubleDouble r{0.0, 0.0};
38+
template <bool FAST2SUM = true, typename T = double>
39+
LIBC_INLINE constexpr NumberPair<T> exact_add(T a, T b) {
40+
NumberPair<T> r{0.0, 0.0};
3241
if constexpr (FAST2SUM) {
3342
r.hi = a + b;
34-
double t = r.hi - a;
43+
T t = r.hi - a;
3544
r.lo = b - t;
3645
} else {
3746
r.hi = a + b;
38-
double t1 = r.hi - a;
39-
double t2 = r.hi - t1;
40-
double t3 = b - t1;
41-
double t4 = a - t2;
47+
T t1 = r.hi - a;
48+
T t2 = r.hi - t1;
49+
T t3 = b - t1;
50+
T t4 = a - t2;
4251
r.lo = t3 + t4;
4352
}
4453
return r;
4554
}
4655

4756
// Assumption: |a.hi| >= |b.hi|
48-
LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a,
49-
const DoubleDouble &b) {
50-
DoubleDouble r = exact_add(a.hi, b.hi);
51-
double lo = a.lo + b.lo;
57+
template <typename T>
58+
LIBC_INLINE constexpr NumberPair<T> add(const NumberPair<T> &a,
59+
const NumberPair<T> &b) {
60+
NumberPair<T> r = exact_add(a.hi, b.hi);
61+
T lo = a.lo + b.lo;
5262
return exact_add(r.hi, r.lo + lo);
5363
}
5464

5565
// Assumption: |a.hi| >= |b|
56-
LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
57-
DoubleDouble r = exact_add<false>(a.hi, b);
66+
template <typename T>
67+
LIBC_INLINE constexpr NumberPair<T> add(const NumberPair<T> &a, T b) {
68+
NumberPair<T> r = exact_add<false>(a.hi, b);
5869
return exact_add(r.hi, r.lo + a.lo);
5970
}
6071

@@ -63,12 +74,12 @@ LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
6374
// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
6475
// Roundings," https://inria.hal.science/hal-04480440.
6576
// Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
66-
template <size_t N = DEFAULT_DOUBLE_SPLIT>
67-
LIBC_INLINE constexpr DoubleDouble split(double a) {
68-
DoubleDouble r{0.0, 0.0};
77+
template <typename T = double, size_t N = DefaultSplit<T>::VALUE>
78+
LIBC_INLINE constexpr NumberPair<T> split(T a) {
79+
NumberPair<T> r{0.0, 0.0};
6980
// CN = 2^N.
70-
constexpr double CN = static_cast<double>(1 << N);
71-
constexpr double C = CN + 1.0;
81+
constexpr T CN = static_cast<T>(1 << N);
82+
constexpr T C = CN + 1.0;
7283
double t1 = C * a;
7384
double t2 = a - t1;
7485
r.hi = t1 + t2;
@@ -77,16 +88,15 @@ LIBC_INLINE constexpr DoubleDouble split(double a) {
7788
}
7889

7990
// Helper for non-fma exact mult where the first number is already split.
80-
template <size_t SPLIT_B = DEFAULT_DOUBLE_SPLIT>
81-
LIBC_INLINE DoubleDouble exact_mult(const DoubleDouble &as, double a,
82-
double b) {
83-
DoubleDouble bs = split<SPLIT_B>(b);
84-
DoubleDouble r{0.0, 0.0};
91+
template <typename T = double, size_t SPLIT_B = DefaultSplit<T>::VALUE>
92+
LIBC_INLINE NumberPair<T> exact_mult(const NumberPair<T> &as, T a, T b) {
93+
NumberPair<T> bs = split<T, SPLIT_B>(b);
94+
NumberPair<T> r{0.0, 0.0};
8595

8696
r.hi = a * b;
87-
double t1 = as.hi * bs.hi - r.hi;
88-
double t2 = as.hi * bs.lo + t1;
89-
double t3 = as.lo * bs.hi + t2;
97+
T t1 = as.hi * bs.hi - r.hi;
98+
T t2 = as.hi * bs.lo + t1;
99+
T t3 = as.lo * bs.hi + t2;
90100
r.lo = as.lo * bs.lo + t3;
91101

92102
return r;
@@ -99,18 +109,18 @@ LIBC_INLINE DoubleDouble exact_mult(const DoubleDouble &as, double a,
99109
// Using Theorem 1 in the paper above, without FMA instruction, if we restrict
100110
// the generated constants to precision <= 51, and splitting it by 2^28 + 1,
101111
// then a * b = r.hi + r.lo is exact for all rounding modes.
102-
template <size_t SPLIT_B = 27>
103-
LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
104-
DoubleDouble r{0.0, 0.0};
112+
template <typename T = double, size_t SPLIT_B = DefaultSplit<T>::VALUE>
113+
LIBC_INLINE NumberPair<T> exact_mult(T a, T b) {
114+
NumberPair<T> r{0.0, 0.0};
105115

106116
#ifdef LIBC_TARGET_CPU_HAS_FMA
107117
r.hi = a * b;
108118
r.lo = fputil::multiply_add(a, b, -r.hi);
109119
#else
110120
// Dekker's Product.
111-
DoubleDouble as = split(a);
121+
NumberPair<T> as = split(a);
112122

113-
r = exact_mult<SPLIT_B>(as, a, b);
123+
r = exact_mult<T, SPLIT_B>(as, a, b);
114124
#endif // LIBC_TARGET_CPU_HAS_FMA
115125

116126
return r;
@@ -125,7 +135,7 @@ LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
125135
template <size_t SPLIT_B = 27>
126136
LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a,
127137
const DoubleDouble &b) {
128-
DoubleDouble r = exact_mult<SPLIT_B>(a.hi, b.hi);
138+
DoubleDouble r = exact_mult<double, SPLIT_B>(a.hi, b.hi);
129139
double t1 = multiply_add(a.hi, b.lo, r.lo);
130140
double t2 = multiply_add(a.lo, b.hi, t1);
131141
r.lo = t2;
@@ -157,19 +167,20 @@ LIBC_INLINE DoubleDouble multiply_add<DoubleDouble>(const DoubleDouble &a,
157167
// rl = q * (ah - bh * rh) + q * (al - bl * rh)
158168
// as accurate as possible, then the error is bounded by:
159169
// |(ah + al) / (bh + bl) - (rh + rl)| < O(bl/bh) * (2^-52 + al/ah + bl/bh)
160-
LIBC_INLINE DoubleDouble div(const DoubleDouble &a, const DoubleDouble &b) {
161-
DoubleDouble r;
162-
double q = 1.0 / b.hi;
170+
template <typename T>
171+
LIBC_INLINE NumberPair<T> div(const NumberPair<T> &a, const NumberPair<T> &b) {
172+
NumberPair<T> r;
173+
T q = T(1) / b.hi;
163174
r.hi = a.hi * q;
164175

165176
#ifdef LIBC_TARGET_CPU_HAS_FMA
166-
double e_hi = fputil::multiply_add(b.hi, -r.hi, a.hi);
167-
double e_lo = fputil::multiply_add(b.lo, -r.hi, a.lo);
177+
T e_hi = fputil::multiply_add(b.hi, -r.hi, a.hi);
178+
T e_lo = fputil::multiply_add(b.lo, -r.hi, a.lo);
168179
#else
169-
DoubleDouble b_hi_r_hi = fputil::exact_mult(b.hi, -r.hi);
170-
DoubleDouble b_lo_r_hi = fputil::exact_mult(b.lo, -r.hi);
171-
double e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo;
172-
double e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo;
180+
NumberPair<T> b_hi_r_hi = fputil::exact_mult(b.hi, -r.hi);
181+
NumberPair<T> b_lo_r_hi = fputil::exact_mult(b.lo, -r.hi);
182+
T e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo;
183+
T e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo;
173184
#endif // LIBC_TARGET_CPU_HAS_FMA
174185

175186
r.lo = q * (e_hi + e_lo);

libc/src/__support/macros/optimization.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
4545
#define LIBC_MATH_FAST \
4646
(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | \
4747
LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)
48+
#define LIBC_MATH_INTERMEDIATE_COMP_IN_FLOAT 0x10
4849

4950
#ifndef LIBC_MATH
5051
#define LIBC_MATH 0
@@ -58,4 +59,8 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
5859
#define LIBC_MATH_HAS_SMALL_TABLES
5960
#endif
6061

62+
#if (LIBC_MATH & LIBC_MATH_INTERMEDIATE_COMP_IN_FLOAT)
63+
#define LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT
64+
#endif
65+
6166
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4052,8 +4052,10 @@ add_entrypoint_object(
40524052
atan2f.cpp
40534053
HDRS
40544054
../atan2f.h
4055+
atan2f_float.h
40554056
DEPENDS
40564057
.inv_trigf_utils
4058+
libc.src.__support.FPUtil.double_double
40574059
libc.src.__support.FPUtil.fp_bits
40584060
libc.src.__support.FPUtil.multiply_add
40594061
libc.src.__support.FPUtil.nearest_integer

libc/src/math/generic/atan2f.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@
1717
#include "src/__support/macros/config.h"
1818
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
1919

20+
#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \
21+
defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT)
22+
23+
// We use float-float implementation to reduce size.
24+
#include "src/math/generic/atan2f_float.h"
25+
26+
#else
27+
2028
namespace LIBC_NAMESPACE_DECL {
2129

2230
namespace {
@@ -324,3 +332,5 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
324332
}
325333

326334
} // namespace LIBC_NAMESPACE_DECL
335+
336+
#endif

0 commit comments

Comments
 (0)