1
1
//! Extension trait for [`f32`] and [`f64`], providing high level wrappers on top of
2
2
//! raw libdevice intrinsics from [`intrinsics`](crate::intrinsics).
3
3
4
- use crate :: intrinsics as raw ;
4
+ use cuda_std_macros :: gpu_only ;
5
5
6
6
// allows us to add new functions to the trait at any time without needing a new major version.
7
7
mod private {
@@ -71,26 +71,32 @@ pub trait FloatExt: Sized + private::Sealed {
71
71
}
72
72
73
73
impl FloatExt for f64 {
74
+ #[ gpu_only]
74
75
fn cospi ( self ) -> Self {
75
76
unsafe { raw:: cospi ( self ) }
76
77
}
77
78
79
+ #[ gpu_only]
78
80
fn error_function ( self ) -> Self {
79
81
unsafe { raw:: erf ( self ) }
80
82
}
81
83
84
+ #[ gpu_only]
82
85
fn complementary_error_function ( self ) -> Self {
83
86
unsafe { raw:: erfc ( self ) }
84
87
}
85
88
89
+ #[ gpu_only]
86
90
fn inv_complementary_error_function ( self ) -> Self {
87
91
unsafe { raw:: erfcinv ( self ) }
88
92
}
89
93
94
+ #[ gpu_only]
90
95
fn scaled_complementary_error_function ( self ) -> Self {
91
96
unsafe { raw:: erfcx ( self ) }
92
97
}
93
98
99
+ #[ gpu_only]
94
100
fn frexp ( self ) -> ( Self , i32 ) {
95
101
let mut exp = 0 ;
96
102
unsafe {
@@ -99,55 +105,68 @@ impl FloatExt for f64 {
99
105
}
100
106
}
101
107
108
+ #[ gpu_only]
102
109
fn unbiased_exp ( self ) -> i32 {
103
110
unsafe { raw:: ilogb ( self ) }
104
111
}
105
112
113
+ #[ gpu_only]
106
114
fn j0 ( self ) -> Self {
107
115
unsafe { raw:: j0 ( self ) }
108
116
}
109
117
118
+ #[ gpu_only]
110
119
fn j1 ( self ) -> Self {
111
120
unsafe { raw:: j1 ( self ) }
112
121
}
113
122
123
+ #[ gpu_only]
114
124
fn jn ( self , order : i32 ) -> Self {
115
125
unsafe { raw:: jn ( order, self ) }
116
126
}
117
127
128
+ #[ gpu_only]
118
129
fn ldexp ( self , exp : i32 ) -> Self {
119
130
unsafe { raw:: ldexp ( self , exp) }
120
131
}
121
132
133
+ #[ gpu_only]
122
134
fn log_gamma ( self ) -> Self {
123
135
unsafe { raw:: lgamma ( self ) }
124
136
}
125
137
138
+ #[ gpu_only]
126
139
fn log1p ( self ) -> Self {
127
140
unsafe { raw:: log1p ( self ) }
128
141
}
129
142
143
+ #[ gpu_only]
130
144
fn norm_cdf ( self ) -> Self {
131
145
unsafe { raw:: normcdf ( self ) }
132
146
}
133
147
148
+ #[ gpu_only]
134
149
fn inv_norm_cdf ( self ) -> Self {
135
150
unsafe { raw:: normcdfinv ( self ) }
136
151
}
137
152
153
+ #[ gpu_only]
138
154
fn rcbrt ( self ) -> Self {
139
155
unsafe { raw:: rcbrt ( self ) }
140
156
}
141
157
158
+ #[ gpu_only]
142
159
fn saturate ( self ) -> Self {
143
160
// this intrinsic doesnt actually exit on f64, so implement it as clamp on f64
144
161
self . clamp ( 0.0 , 1.0 )
145
162
}
146
163
164
+ #[ gpu_only]
147
165
fn scale_by_n ( self , exp : i32 ) -> Self {
148
166
unsafe { raw:: scalbn ( self , exp) }
149
167
}
150
168
169
+ #[ gpu_only]
151
170
fn sincospi ( self ) -> ( Self , Self ) {
152
171
let mut sin = 0.0 ;
153
172
let mut cos = 0.0 ;
@@ -157,48 +176,59 @@ impl FloatExt for f64 {
157
176
( sin, cos)
158
177
}
159
178
179
+ #[ gpu_only]
160
180
fn sinpi ( self ) -> Self {
161
181
unsafe { raw:: sinpi ( self ) }
162
182
}
163
183
184
+ #[ gpu_only]
164
185
fn gamma ( self ) -> Self {
165
186
unsafe { raw:: tgamma ( self ) }
166
187
}
167
188
189
+ #[ gpu_only]
168
190
fn y0 ( self ) -> Self {
169
191
unsafe { raw:: y0 ( self ) }
170
192
}
171
193
194
+ #[ gpu_only]
172
195
fn y1 ( self ) -> Self {
173
196
unsafe { raw:: y1 ( self ) }
174
197
}
175
198
199
+ #[ gpu_only]
176
200
fn yn ( self , order : i32 ) -> Self {
177
201
unsafe { raw:: yn ( order, self ) }
178
202
}
179
203
}
180
204
181
205
impl FloatExt for f32 {
206
+ #[ gpu_only]
182
207
fn cospi ( self ) -> Self {
183
208
unsafe { raw:: cospif ( self ) }
184
209
}
185
210
211
+ #[ gpu_only]
186
212
fn error_function ( self ) -> Self {
187
213
unsafe { raw:: erff ( self ) }
188
214
}
189
215
216
+ #[ gpu_only]
190
217
fn complementary_error_function ( self ) -> Self {
191
218
unsafe { raw:: erfcf ( self ) }
192
219
}
193
220
221
+ #[ gpu_only]
194
222
fn inv_complementary_error_function ( self ) -> Self {
195
223
unsafe { raw:: erfcinvf ( self ) }
196
224
}
197
225
226
+ #[ gpu_only]
198
227
fn scaled_complementary_error_function ( self ) -> Self {
199
228
unsafe { raw:: erfcxf ( self ) }
200
229
}
201
230
231
+ #[ gpu_only]
202
232
fn frexp ( self ) -> ( Self , i32 ) {
203
233
let mut exp = 0 ;
204
234
unsafe {
@@ -207,54 +237,67 @@ impl FloatExt for f32 {
207
237
}
208
238
}
209
239
240
+ #[ gpu_only]
210
241
fn unbiased_exp ( self ) -> i32 {
211
242
unsafe { raw:: ilogbf ( self ) }
212
243
}
213
244
245
+ #[ gpu_only]
214
246
fn j0 ( self ) -> Self {
215
247
unsafe { raw:: j0f ( self ) }
216
248
}
217
249
250
+ #[ gpu_only]
218
251
fn j1 ( self ) -> Self {
219
252
unsafe { raw:: j1f ( self ) }
220
253
}
221
254
255
+ #[ gpu_only]
222
256
fn jn ( self , order : i32 ) -> Self {
223
257
unsafe { raw:: jnf ( order, self ) }
224
258
}
225
259
260
+ #[ gpu_only]
226
261
fn ldexp ( self , exp : i32 ) -> Self {
227
262
unsafe { raw:: ldexpf ( self , exp) }
228
263
}
229
264
265
+ #[ gpu_only]
230
266
fn log_gamma ( self ) -> Self {
231
267
unsafe { raw:: lgammaf ( self ) }
232
268
}
233
269
270
+ #[ gpu_only]
234
271
fn log1p ( self ) -> Self {
235
272
unsafe { raw:: log1pf ( self ) }
236
273
}
237
274
275
+ #[ gpu_only]
238
276
fn norm_cdf ( self ) -> Self {
239
277
unsafe { raw:: normcdff ( self ) }
240
278
}
241
279
280
+ #[ gpu_only]
242
281
fn inv_norm_cdf ( self ) -> Self {
243
282
unsafe { raw:: normcdfinvf ( self ) }
244
283
}
245
284
285
+ #[ gpu_only]
246
286
fn rcbrt ( self ) -> Self {
247
287
unsafe { raw:: rcbrtf ( self ) }
248
288
}
249
289
290
+ #[ gpu_only]
250
291
fn saturate ( self ) -> Self {
251
292
unsafe { raw:: saturatef ( self ) }
252
293
}
253
294
295
+ #[ gpu_only]
254
296
fn scale_by_n ( self , exp : i32 ) -> Self {
255
297
unsafe { raw:: scalbnf ( self , exp) }
256
298
}
257
299
300
+ #[ gpu_only]
258
301
fn sincospi ( self ) -> ( Self , Self ) {
259
302
let mut sin = 0.0 ;
260
303
let mut cos = 0.0 ;
@@ -264,22 +307,27 @@ impl FloatExt for f32 {
264
307
( sin, cos)
265
308
}
266
309
310
+ #[ gpu_only]
267
311
fn sinpi ( self ) -> Self {
268
312
unsafe { raw:: sinpif ( self ) }
269
313
}
270
314
315
+ #[ gpu_only]
271
316
fn gamma ( self ) -> Self {
272
317
unsafe { raw:: tgammaf ( self ) }
273
318
}
274
319
320
+ #[ gpu_only]
275
321
fn y0 ( self ) -> Self {
276
322
unsafe { raw:: y0f ( self ) }
277
323
}
278
324
325
+ #[ gpu_only]
279
326
fn y1 ( self ) -> Self {
280
327
unsafe { raw:: y1f ( self ) }
281
328
}
282
329
330
+ #[ gpu_only]
283
331
fn yn ( self , order : i32 ) -> Self {
284
332
unsafe { raw:: ynf ( order, self ) }
285
333
}
0 commit comments