Skip to content

Commit 7e2d1d1

Browse files
committed
Mark functions in float_ext.rs as gpu_only
Different behavior between linux and windows linkers causes windows CI to fail when compiliing crates that need cuda_std. https://learn.microsoft.com/en-us/cpp/error-messages/tool-errors/linker-tools-error-lnk2019?view=msvc-170#you-call-intrinsic-functions-or-pass-argument-types-to-intrinsic-functions-that-arent-supported-on-your-target-architecture This link contains the exact reason for the error message. Compiling float_ext for an architecture other than nvptx caused the linker to fail with LNK2019, for the reason linked.
1 parent 42afe4b commit 7e2d1d1

File tree

1 file changed

+49
-1
lines changed

1 file changed

+49
-1
lines changed

crates/cuda_std/src/float_ext.rs

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Extension trait for [`f32`] and [`f64`], providing high level wrappers on top of
22
//! raw libdevice intrinsics from [`intrinsics`](crate::intrinsics).
33
4-
use crate::intrinsics as raw;
4+
use cuda_std_macros::gpu_only;
55

66
// allows us to add new functions to the trait at any time without needing a new major version.
77
mod private {
@@ -71,26 +71,32 @@ pub trait FloatExt: Sized + private::Sealed {
7171
}
7272

7373
impl FloatExt for f64 {
74+
#[gpu_only]
7475
fn cospi(self) -> Self {
7576
unsafe { raw::cospi(self) }
7677
}
7778

79+
#[gpu_only]
7880
fn error_function(self) -> Self {
7981
unsafe { raw::erf(self) }
8082
}
8183

84+
#[gpu_only]
8285
fn complementary_error_function(self) -> Self {
8386
unsafe { raw::erfc(self) }
8487
}
8588

89+
#[gpu_only]
8690
fn inv_complementary_error_function(self) -> Self {
8791
unsafe { raw::erfcinv(self) }
8892
}
8993

94+
#[gpu_only]
9095
fn scaled_complementary_error_function(self) -> Self {
9196
unsafe { raw::erfcx(self) }
9297
}
9398

99+
#[gpu_only]
94100
fn frexp(self) -> (Self, i32) {
95101
let mut exp = 0;
96102
unsafe {
@@ -99,55 +105,68 @@ impl FloatExt for f64 {
99105
}
100106
}
101107

108+
#[gpu_only]
102109
fn unbiased_exp(self) -> i32 {
103110
unsafe { raw::ilogb(self) }
104111
}
105112

113+
#[gpu_only]
106114
fn j0(self) -> Self {
107115
unsafe { raw::j0(self) }
108116
}
109117

118+
#[gpu_only]
110119
fn j1(self) -> Self {
111120
unsafe { raw::j1(self) }
112121
}
113122

123+
#[gpu_only]
114124
fn jn(self, order: i32) -> Self {
115125
unsafe { raw::jn(order, self) }
116126
}
117127

128+
#[gpu_only]
118129
fn ldexp(self, exp: i32) -> Self {
119130
unsafe { raw::ldexp(self, exp) }
120131
}
121132

133+
#[gpu_only]
122134
fn log_gamma(self) -> Self {
123135
unsafe { raw::lgamma(self) }
124136
}
125137

138+
#[gpu_only]
126139
fn log1p(self) -> Self {
127140
unsafe { raw::log1p(self) }
128141
}
129142

143+
#[gpu_only]
130144
fn norm_cdf(self) -> Self {
131145
unsafe { raw::normcdf(self) }
132146
}
133147

148+
#[gpu_only]
134149
fn inv_norm_cdf(self) -> Self {
135150
unsafe { raw::normcdfinv(self) }
136151
}
137152

153+
#[gpu_only]
138154
fn rcbrt(self) -> Self {
139155
unsafe { raw::rcbrt(self) }
140156
}
141157

158+
#[gpu_only]
142159
fn saturate(self) -> Self {
143160
// this intrinsic doesnt actually exit on f64, so implement it as clamp on f64
144161
self.clamp(0.0, 1.0)
145162
}
146163

164+
#[gpu_only]
147165
fn scale_by_n(self, exp: i32) -> Self {
148166
unsafe { raw::scalbn(self, exp) }
149167
}
150168

169+
#[gpu_only]
151170
fn sincospi(self) -> (Self, Self) {
152171
let mut sin = 0.0;
153172
let mut cos = 0.0;
@@ -157,48 +176,59 @@ impl FloatExt for f64 {
157176
(sin, cos)
158177
}
159178

179+
#[gpu_only]
160180
fn sinpi(self) -> Self {
161181
unsafe { raw::sinpi(self) }
162182
}
163183

184+
#[gpu_only]
164185
fn gamma(self) -> Self {
165186
unsafe { raw::tgamma(self) }
166187
}
167188

189+
#[gpu_only]
168190
fn y0(self) -> Self {
169191
unsafe { raw::y0(self) }
170192
}
171193

194+
#[gpu_only]
172195
fn y1(self) -> Self {
173196
unsafe { raw::y1(self) }
174197
}
175198

199+
#[gpu_only]
176200
fn yn(self, order: i32) -> Self {
177201
unsafe { raw::yn(order, self) }
178202
}
179203
}
180204

181205
impl FloatExt for f32 {
206+
#[gpu_only]
182207
fn cospi(self) -> Self {
183208
unsafe { raw::cospif(self) }
184209
}
185210

211+
#[gpu_only]
186212
fn error_function(self) -> Self {
187213
unsafe { raw::erff(self) }
188214
}
189215

216+
#[gpu_only]
190217
fn complementary_error_function(self) -> Self {
191218
unsafe { raw::erfcf(self) }
192219
}
193220

221+
#[gpu_only]
194222
fn inv_complementary_error_function(self) -> Self {
195223
unsafe { raw::erfcinvf(self) }
196224
}
197225

226+
#[gpu_only]
198227
fn scaled_complementary_error_function(self) -> Self {
199228
unsafe { raw::erfcxf(self) }
200229
}
201230

231+
#[gpu_only]
202232
fn frexp(self) -> (Self, i32) {
203233
let mut exp = 0;
204234
unsafe {
@@ -207,54 +237,67 @@ impl FloatExt for f32 {
207237
}
208238
}
209239

240+
#[gpu_only]
210241
fn unbiased_exp(self) -> i32 {
211242
unsafe { raw::ilogbf(self) }
212243
}
213244

245+
#[gpu_only]
214246
fn j0(self) -> Self {
215247
unsafe { raw::j0f(self) }
216248
}
217249

250+
#[gpu_only]
218251
fn j1(self) -> Self {
219252
unsafe { raw::j1f(self) }
220253
}
221254

255+
#[gpu_only]
222256
fn jn(self, order: i32) -> Self {
223257
unsafe { raw::jnf(order, self) }
224258
}
225259

260+
#[gpu_only]
226261
fn ldexp(self, exp: i32) -> Self {
227262
unsafe { raw::ldexpf(self, exp) }
228263
}
229264

265+
#[gpu_only]
230266
fn log_gamma(self) -> Self {
231267
unsafe { raw::lgammaf(self) }
232268
}
233269

270+
#[gpu_only]
234271
fn log1p(self) -> Self {
235272
unsafe { raw::log1pf(self) }
236273
}
237274

275+
#[gpu_only]
238276
fn norm_cdf(self) -> Self {
239277
unsafe { raw::normcdff(self) }
240278
}
241279

280+
#[gpu_only]
242281
fn inv_norm_cdf(self) -> Self {
243282
unsafe { raw::normcdfinvf(self) }
244283
}
245284

285+
#[gpu_only]
246286
fn rcbrt(self) -> Self {
247287
unsafe { raw::rcbrtf(self) }
248288
}
249289

290+
#[gpu_only]
250291
fn saturate(self) -> Self {
251292
unsafe { raw::saturatef(self) }
252293
}
253294

295+
#[gpu_only]
254296
fn scale_by_n(self, exp: i32) -> Self {
255297
unsafe { raw::scalbnf(self, exp) }
256298
}
257299

300+
#[gpu_only]
258301
fn sincospi(self) -> (Self, Self) {
259302
let mut sin = 0.0;
260303
let mut cos = 0.0;
@@ -264,22 +307,27 @@ impl FloatExt for f32 {
264307
(sin, cos)
265308
}
266309

310+
#[gpu_only]
267311
fn sinpi(self) -> Self {
268312
unsafe { raw::sinpif(self) }
269313
}
270314

315+
#[gpu_only]
271316
fn gamma(self) -> Self {
272317
unsafe { raw::tgammaf(self) }
273318
}
274319

320+
#[gpu_only]
275321
fn y0(self) -> Self {
276322
unsafe { raw::y0f(self) }
277323
}
278324

325+
#[gpu_only]
279326
fn y1(self) -> Self {
280327
unsafe { raw::y1f(self) }
281328
}
282329

330+
#[gpu_only]
283331
fn yn(self, order: i32) -> Self {
284332
unsafe { raw::ynf(order, self) }
285333
}

0 commit comments

Comments
 (0)