1
1
//! Extension trait for [`f32`] and [`f64`], providing high level wrappers on top of
2
2
//! raw libdevice intrinsics from [`intrinsics`](crate::intrinsics).
3
3
4
+ use cuda_std_macros:: gpu_only;
5
+
6
+ #[ cfg( target_arch = "nvptx64" ) ]
4
7
use crate :: intrinsics as raw;
5
8
6
9
// allows us to add new functions to the trait at any time without needing a new major version.
@@ -71,26 +74,32 @@ pub trait FloatExt: Sized + private::Sealed {
71
74
}
72
75
73
76
impl FloatExt for f64 {
77
+ #[ gpu_only]
74
78
fn cospi ( self ) -> Self {
75
79
unsafe { raw:: cospi ( self ) }
76
80
}
77
81
82
+ #[ gpu_only]
78
83
fn error_function ( self ) -> Self {
79
84
unsafe { raw:: erf ( self ) }
80
85
}
81
86
87
+ #[ gpu_only]
82
88
fn complementary_error_function ( self ) -> Self {
83
89
unsafe { raw:: erfc ( self ) }
84
90
}
85
91
92
+ #[ gpu_only]
86
93
fn inv_complementary_error_function ( self ) -> Self {
87
94
unsafe { raw:: erfcinv ( self ) }
88
95
}
89
96
97
+ #[ gpu_only]
90
98
fn scaled_complementary_error_function ( self ) -> Self {
91
99
unsafe { raw:: erfcx ( self ) }
92
100
}
93
101
102
+ #[ gpu_only]
94
103
fn frexp ( self ) -> ( Self , i32 ) {
95
104
let mut exp = 0 ;
96
105
unsafe {
@@ -99,55 +108,68 @@ impl FloatExt for f64 {
99
108
}
100
109
}
101
110
111
+ #[ gpu_only]
102
112
fn unbiased_exp ( self ) -> i32 {
103
113
unsafe { raw:: ilogb ( self ) }
104
114
}
105
115
116
+ #[ gpu_only]
106
117
fn j0 ( self ) -> Self {
107
118
unsafe { raw:: j0 ( self ) }
108
119
}
109
120
121
+ #[ gpu_only]
110
122
fn j1 ( self ) -> Self {
111
123
unsafe { raw:: j1 ( self ) }
112
124
}
113
125
126
+ #[ gpu_only]
114
127
fn jn ( self , order : i32 ) -> Self {
115
128
unsafe { raw:: jn ( order, self ) }
116
129
}
117
130
131
+ #[ gpu_only]
118
132
fn ldexp ( self , exp : i32 ) -> Self {
119
133
unsafe { raw:: ldexp ( self , exp) }
120
134
}
121
135
136
+ #[ gpu_only]
122
137
fn log_gamma ( self ) -> Self {
123
138
unsafe { raw:: lgamma ( self ) }
124
139
}
125
140
141
+ #[ gpu_only]
126
142
fn log1p ( self ) -> Self {
127
143
unsafe { raw:: log1p ( self ) }
128
144
}
129
145
146
+ #[ gpu_only]
130
147
fn norm_cdf ( self ) -> Self {
131
148
unsafe { raw:: normcdf ( self ) }
132
149
}
133
150
151
+ #[ gpu_only]
134
152
fn inv_norm_cdf ( self ) -> Self {
135
153
unsafe { raw:: normcdfinv ( self ) }
136
154
}
137
155
156
+ #[ gpu_only]
138
157
fn rcbrt ( self ) -> Self {
139
158
unsafe { raw:: rcbrt ( self ) }
140
159
}
141
160
161
+ #[ gpu_only]
142
162
fn saturate ( self ) -> Self {
143
163
// this intrinsic doesnt actually exit on f64, so implement it as clamp on f64
144
164
self . clamp ( 0.0 , 1.0 )
145
165
}
146
166
167
+ #[ gpu_only]
147
168
fn scale_by_n ( self , exp : i32 ) -> Self {
148
169
unsafe { raw:: scalbn ( self , exp) }
149
170
}
150
171
172
+ #[ gpu_only]
151
173
fn sincospi ( self ) -> ( Self , Self ) {
152
174
let mut sin = 0.0 ;
153
175
let mut cos = 0.0 ;
@@ -157,48 +179,59 @@ impl FloatExt for f64 {
157
179
( sin, cos)
158
180
}
159
181
182
+ #[ gpu_only]
160
183
fn sinpi ( self ) -> Self {
161
184
unsafe { raw:: sinpi ( self ) }
162
185
}
163
186
187
+ #[ gpu_only]
164
188
fn gamma ( self ) -> Self {
165
189
unsafe { raw:: tgamma ( self ) }
166
190
}
167
191
192
+ #[ gpu_only]
168
193
fn y0 ( self ) -> Self {
169
194
unsafe { raw:: y0 ( self ) }
170
195
}
171
196
197
+ #[ gpu_only]
172
198
fn y1 ( self ) -> Self {
173
199
unsafe { raw:: y1 ( self ) }
174
200
}
175
201
202
+ #[ gpu_only]
176
203
fn yn ( self , order : i32 ) -> Self {
177
204
unsafe { raw:: yn ( order, self ) }
178
205
}
179
206
}
180
207
181
208
impl FloatExt for f32 {
209
+ #[ gpu_only]
182
210
fn cospi ( self ) -> Self {
183
211
unsafe { raw:: cospif ( self ) }
184
212
}
185
213
214
+ #[ gpu_only]
186
215
fn error_function ( self ) -> Self {
187
216
unsafe { raw:: erff ( self ) }
188
217
}
189
218
219
+ #[ gpu_only]
190
220
fn complementary_error_function ( self ) -> Self {
191
221
unsafe { raw:: erfcf ( self ) }
192
222
}
193
223
224
+ #[ gpu_only]
194
225
fn inv_complementary_error_function ( self ) -> Self {
195
226
unsafe { raw:: erfcinvf ( self ) }
196
227
}
197
228
229
+ #[ gpu_only]
198
230
fn scaled_complementary_error_function ( self ) -> Self {
199
231
unsafe { raw:: erfcxf ( self ) }
200
232
}
201
233
234
+ #[ gpu_only]
202
235
fn frexp ( self ) -> ( Self , i32 ) {
203
236
let mut exp = 0 ;
204
237
unsafe {
@@ -207,54 +240,67 @@ impl FloatExt for f32 {
207
240
}
208
241
}
209
242
243
+ #[ gpu_only]
210
244
fn unbiased_exp ( self ) -> i32 {
211
245
unsafe { raw:: ilogbf ( self ) }
212
246
}
213
247
248
+ #[ gpu_only]
214
249
fn j0 ( self ) -> Self {
215
250
unsafe { raw:: j0f ( self ) }
216
251
}
217
252
253
+ #[ gpu_only]
218
254
fn j1 ( self ) -> Self {
219
255
unsafe { raw:: j1f ( self ) }
220
256
}
221
257
258
+ #[ gpu_only]
222
259
fn jn ( self , order : i32 ) -> Self {
223
260
unsafe { raw:: jnf ( order, self ) }
224
261
}
225
262
263
+ #[ gpu_only]
226
264
fn ldexp ( self , exp : i32 ) -> Self {
227
265
unsafe { raw:: ldexpf ( self , exp) }
228
266
}
229
267
268
+ #[ gpu_only]
230
269
fn log_gamma ( self ) -> Self {
231
270
unsafe { raw:: lgammaf ( self ) }
232
271
}
233
272
273
+ #[ gpu_only]
234
274
fn log1p ( self ) -> Self {
235
275
unsafe { raw:: log1pf ( self ) }
236
276
}
237
277
278
+ #[ gpu_only]
238
279
fn norm_cdf ( self ) -> Self {
239
280
unsafe { raw:: normcdff ( self ) }
240
281
}
241
282
283
+ #[ gpu_only]
242
284
fn inv_norm_cdf ( self ) -> Self {
243
285
unsafe { raw:: normcdfinvf ( self ) }
244
286
}
245
287
288
+ #[ gpu_only]
246
289
fn rcbrt ( self ) -> Self {
247
290
unsafe { raw:: rcbrtf ( self ) }
248
291
}
249
292
293
+ #[ gpu_only]
250
294
fn saturate ( self ) -> Self {
251
295
unsafe { raw:: saturatef ( self ) }
252
296
}
253
297
298
+ #[ gpu_only]
254
299
fn scale_by_n ( self , exp : i32 ) -> Self {
255
300
unsafe { raw:: scalbnf ( self , exp) }
256
301
}
257
302
303
+ #[ gpu_only]
258
304
fn sincospi ( self ) -> ( Self , Self ) {
259
305
let mut sin = 0.0 ;
260
306
let mut cos = 0.0 ;
@@ -264,22 +310,27 @@ impl FloatExt for f32 {
264
310
( sin, cos)
265
311
}
266
312
313
+ #[ gpu_only]
267
314
fn sinpi ( self ) -> Self {
268
315
unsafe { raw:: sinpif ( self ) }
269
316
}
270
317
318
+ #[ gpu_only]
271
319
fn gamma ( self ) -> Self {
272
320
unsafe { raw:: tgammaf ( self ) }
273
321
}
274
322
323
+ #[ gpu_only]
275
324
fn y0 ( self ) -> Self {
276
325
unsafe { raw:: y0f ( self ) }
277
326
}
278
327
328
+ #[ gpu_only]
279
329
fn y1 ( self ) -> Self {
280
330
unsafe { raw:: y1f ( self ) }
281
331
}
282
332
333
+ #[ gpu_only]
283
334
fn yn ( self , order : i32 ) -> Self {
284
335
unsafe { raw:: ynf ( order, self ) }
285
336
}
0 commit comments