Mark functions in float_ext.rs as gpu_only (#169)

Schmiedium · web-flow · commit 792056ccc6b8 · 2025-03-17T20:20:18.000-04:00
* Mark functions in float_ext.rs as gpu_only Different behavior between linux and windows linkers causes windows CI to fail when compiliing crates that need cuda_std. https://learn.microsoft.com/en-us/cpp/error-messages/tool-errors/linker-tools-error-lnk2019?view=msvc-170#you-call-intrinsic-functions-or-pass-argument-types-to-intrinsic-functions-that-arent-supported-on-your-target-architecture This link contains the exact reason for the error message. Compiling float_ext for an architecture other than nvptx caused the linker to fail with LNK2019, for the reason linked.
diff --git a/crates/cuda_std/src/float_ext.rs b/crates/cuda_std/src/float_ext.rs
@@ -1,6 +1,9 @@
 //! Extension trait for [`f32`] and [`f64`], providing high level wrappers on top of
 //! raw libdevice intrinsics from [`intrinsics`](crate::intrinsics).
 
+use cuda_std_macros::gpu_only;
+
+#[cfg(target_arch = "nvptx64")]
 use crate::intrinsics as raw;
 
 // allows us to add new functions to the trait at any time without needing a new major version.
@@ -71,26 +74,32 @@ pub trait FloatExt: Sized + private::Sealed {
 }
 
 impl FloatExt for f64 {
+    #[gpu_only]
     fn cospi(self) -> Self {
         unsafe { raw::cospi(self) }
     }
 
+    #[gpu_only]
     fn error_function(self) -> Self {
         unsafe { raw::erf(self) }
     }
 
+    #[gpu_only]
     fn complementary_error_function(self) -> Self {
         unsafe { raw::erfc(self) }
     }
 
+    #[gpu_only]
     fn inv_complementary_error_function(self) -> Self {
         unsafe { raw::erfcinv(self) }
     }
 
+    #[gpu_only]
     fn scaled_complementary_error_function(self) -> Self {
         unsafe { raw::erfcx(self) }
     }
 
+    #[gpu_only]
     fn frexp(self) -> (Self, i32) {
         let mut exp = 0;
         unsafe {
@@ -99,55 +108,68 @@ impl FloatExt for f64 {
         }
     }
 
+    #[gpu_only]
     fn unbiased_exp(self) -> i32 {
         unsafe { raw::ilogb(self) }
     }
 
+    #[gpu_only]
     fn j0(self) -> Self {
         unsafe { raw::j0(self) }
     }
 
+    #[gpu_only]
     fn j1(self) -> Self {
         unsafe { raw::j1(self) }
     }
 
+    #[gpu_only]
     fn jn(self, order: i32) -> Self {
         unsafe { raw::jn(order, self) }
     }
 
+    #[gpu_only]
     fn ldexp(self, exp: i32) -> Self {
         unsafe { raw::ldexp(self, exp) }
     }
 
+    #[gpu_only]
     fn log_gamma(self) -> Self {
         unsafe { raw::lgamma(self) }
     }
 
+    #[gpu_only]
     fn log1p(self) -> Self {
         unsafe { raw::log1p(self) }
     }
 
+    #[gpu_only]
     fn norm_cdf(self) -> Self {
         unsafe { raw::normcdf(self) }
     }
 
+    #[gpu_only]
     fn inv_norm_cdf(self) -> Self {
         unsafe { raw::normcdfinv(self) }
     }
 
+    #[gpu_only]
     fn rcbrt(self) -> Self {
         unsafe { raw::rcbrt(self) }
     }
 
+    #[gpu_only]
     fn saturate(self) -> Self {
         // this intrinsic doesnt actually exit on f64, so implement it as clamp on f64
         self.clamp(0.0, 1.0)
     }
 
+    #[gpu_only]
     fn scale_by_n(self, exp: i32) -> Self {
         unsafe { raw::scalbn(self, exp) }
     }
 
+    #[gpu_only]
     fn sincospi(self) -> (Self, Self) {
         let mut sin = 0.0;
         let mut cos = 0.0;
@@ -157,48 +179,59 @@ impl FloatExt for f64 {
         (sin, cos)
     }
 
+    #[gpu_only]
     fn sinpi(self) -> Self {
         unsafe { raw::sinpi(self) }
     }
 
+    #[gpu_only]
     fn gamma(self) -> Self {
         unsafe { raw::tgamma(self) }
     }
 
+    #[gpu_only]
     fn y0(self) -> Self {
         unsafe { raw::y0(self) }
     }
 
+    #[gpu_only]
     fn y1(self) -> Self {
         unsafe { raw::y1(self) }
     }
 
+    #[gpu_only]
     fn yn(self, order: i32) -> Self {
         unsafe { raw::yn(order, self) }
     }
 }
 
 impl FloatExt for f32 {
+    #[gpu_only]
     fn cospi(self) -> Self {
         unsafe { raw::cospif(self) }
     }
 
+    #[gpu_only]
     fn error_function(self) -> Self {
         unsafe { raw::erff(self) }
     }
 
+    #[gpu_only]
     fn complementary_error_function(self) -> Self {
         unsafe { raw::erfcf(self) }
     }
 
+    #[gpu_only]
     fn inv_complementary_error_function(self) -> Self {
         unsafe { raw::erfcinvf(self) }
     }
 
+    #[gpu_only]
     fn scaled_complementary_error_function(self) -> Self {
         unsafe { raw::erfcxf(self) }
     }
 
+    #[gpu_only]
     fn frexp(self) -> (Self, i32) {
         let mut exp = 0;
         unsafe {
@@ -207,54 +240,67 @@ impl FloatExt for f32 {
         }
     }
 
+    #[gpu_only]
     fn unbiased_exp(self) -> i32 {
         unsafe { raw::ilogbf(self) }
     }
 
+    #[gpu_only]
     fn j0(self) -> Self {
         unsafe { raw::j0f(self) }
     }
 
+    #[gpu_only]
     fn j1(self) -> Self {
         unsafe { raw::j1f(self) }
     }
 
+    #[gpu_only]
     fn jn(self, order: i32) -> Self {
         unsafe { raw::jnf(order, self) }
     }
 
+    #[gpu_only]
     fn ldexp(self, exp: i32) -> Self {
         unsafe { raw::ldexpf(self, exp) }
     }
 
+    #[gpu_only]
     fn log_gamma(self) -> Self {
         unsafe { raw::lgammaf(self) }
     }
 
+    #[gpu_only]
     fn log1p(self) -> Self {
         unsafe { raw::log1pf(self) }
     }
 
+    #[gpu_only]
     fn norm_cdf(self) -> Self {
         unsafe { raw::normcdff(self) }
     }
 
+    #[gpu_only]
     fn inv_norm_cdf(self) -> Self {
         unsafe { raw::normcdfinvf(self) }
     }
 
+    #[gpu_only]
     fn rcbrt(self) -> Self {
         unsafe { raw::rcbrtf(self) }
     }
 
+    #[gpu_only]
     fn saturate(self) -> Self {
         unsafe { raw::saturatef(self) }
     }
 
+    #[gpu_only]
     fn scale_by_n(self, exp: i32) -> Self {
         unsafe { raw::scalbnf(self, exp) }
     }
 
+    #[gpu_only]
     fn sincospi(self) -> (Self, Self) {
         let mut sin = 0.0;
         let mut cos = 0.0;
@@ -264,22 +310,27 @@ impl FloatExt for f32 {
         (sin, cos)
     }
 
+    #[gpu_only]
     fn sinpi(self) -> Self {
         unsafe { raw::sinpif(self) }
     }
 
+    #[gpu_only]
     fn gamma(self) -> Self {
         unsafe { raw::tgammaf(self) }
     }
 
+    #[gpu_only]
     fn y0(self) -> Self {
         unsafe { raw::y0f(self) }
     }
 
+    #[gpu_only]
     fn y1(self) -> Self {
         unsafe { raw::y1f(self) }
     }
 
+    #[gpu_only]
     fn yn(self, order: i32) -> Self {
         unsafe { raw::ynf(order, self) }
     }

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,9 @@`
`1`	`1`	//! Extension trait for [`f32`] and [`f64`], providing high level wrappers on top of
`2`	`2`	//! raw libdevice intrinsics from [`intrinsics`](crate::intrinsics).
`3`	`3`
	`4`	`+use cuda_std_macros::gpu_only;`
	`5`	`+`
	`6`	`+#[cfg(target_arch = "nvptx64")]`
`4`	`7`	`use crate::intrinsics as raw;`
`5`	`8`
`6`	`9`	`// allows us to add new functions to the trait at any time without needing a new major version.`
`@@ -71,26 +74,32 @@ pub trait FloatExt: Sized + private::Sealed {`
`71`	`74`	`}`
`72`	`75`
`73`	`76`	`impl FloatExt for f64 {`
	`77`	`+ #[gpu_only]`
`74`	`78`	`fn cospi(self) -> Self {`
`75`	`79`	`unsafe { raw::cospi(self) }`
`76`	`80`	`}`
`77`	`81`
	`82`	`+ #[gpu_only]`
`78`	`83`	`fn error_function(self) -> Self {`
`79`	`84`	`unsafe { raw::erf(self) }`
`80`	`85`	`}`
`81`	`86`
	`87`	`+ #[gpu_only]`
`82`	`88`	`fn complementary_error_function(self) -> Self {`
`83`	`89`	`unsafe { raw::erfc(self) }`
`84`	`90`	`}`
`85`	`91`
	`92`	`+ #[gpu_only]`
`86`	`93`	`fn inv_complementary_error_function(self) -> Self {`
`87`	`94`	`unsafe { raw::erfcinv(self) }`
`88`	`95`	`}`
`89`	`96`
	`97`	`+ #[gpu_only]`
`90`	`98`	`fn scaled_complementary_error_function(self) -> Self {`
`91`	`99`	`unsafe { raw::erfcx(self) }`
`92`	`100`	`}`
`93`	`101`
	`102`	`+ #[gpu_only]`
`94`	`103`	`fn frexp(self) -> (Self, i32) {`
`95`	`104`	`let mut exp = 0;`
`96`	`105`	`unsafe {`
`@@ -99,55 +108,68 @@ impl FloatExt for f64 {`
`99`	`108`	`}`
`100`	`109`	`}`
`101`	`110`
	`111`	`+ #[gpu_only]`
`102`	`112`	`fn unbiased_exp(self) -> i32 {`
`103`	`113`	`unsafe { raw::ilogb(self) }`
`104`	`114`	`}`
`105`	`115`
	`116`	`+ #[gpu_only]`
`106`	`117`	`fn j0(self) -> Self {`
`107`	`118`	`unsafe { raw::j0(self) }`
`108`	`119`	`}`
`109`	`120`
	`121`	`+ #[gpu_only]`
`110`	`122`	`fn j1(self) -> Self {`
`111`	`123`	`unsafe { raw::j1(self) }`
`112`	`124`	`}`
`113`	`125`
	`126`	`+ #[gpu_only]`
`114`	`127`	`fn jn(self, order: i32) -> Self {`
`115`	`128`	`unsafe { raw::jn(order, self) }`
`116`	`129`	`}`
`117`	`130`
	`131`	`+ #[gpu_only]`
`118`	`132`	`fn ldexp(self, exp: i32) -> Self {`
`119`	`133`	`unsafe { raw::ldexp(self, exp) }`
`120`	`134`	`}`
`121`	`135`
	`136`	`+ #[gpu_only]`
`122`	`137`	`fn log_gamma(self) -> Self {`
`123`	`138`	`unsafe { raw::lgamma(self) }`
`124`	`139`	`}`
`125`	`140`
	`141`	`+ #[gpu_only]`
`126`	`142`	`fn log1p(self) -> Self {`
`127`	`143`	`unsafe { raw::log1p(self) }`
`128`	`144`	`}`
`129`	`145`
	`146`	`+ #[gpu_only]`
`130`	`147`	`fn norm_cdf(self) -> Self {`
`131`	`148`	`unsafe { raw::normcdf(self) }`
`132`	`149`	`}`
`133`	`150`
	`151`	`+ #[gpu_only]`
`134`	`152`	`fn inv_norm_cdf(self) -> Self {`
`135`	`153`	`unsafe { raw::normcdfinv(self) }`
`136`	`154`	`}`
`137`	`155`
	`156`	`+ #[gpu_only]`
`138`	`157`	`fn rcbrt(self) -> Self {`
`139`	`158`	`unsafe { raw::rcbrt(self) }`
`140`	`159`	`}`
`141`	`160`
	`161`	`+ #[gpu_only]`
`142`	`162`	`fn saturate(self) -> Self {`
`143`	`163`	`// this intrinsic doesnt actually exit on f64, so implement it as clamp on f64`
`144`	`164`	`self.clamp(0.0, 1.0)`
`145`	`165`	`}`
`146`	`166`
	`167`	`+ #[gpu_only]`
`147`	`168`	`fn scale_by_n(self, exp: i32) -> Self {`
`148`	`169`	`unsafe { raw::scalbn(self, exp) }`
`149`	`170`	`}`
`150`	`171`
	`172`	`+ #[gpu_only]`
`151`	`173`	`fn sincospi(self) -> (Self, Self) {`
`152`	`174`	`let mut sin = 0.0;`
`153`	`175`	`let mut cos = 0.0;`
`@@ -157,48 +179,59 @@ impl FloatExt for f64 {`
`157`	`179`	`(sin, cos)`
`158`	`180`	`}`
`159`	`181`
	`182`	`+ #[gpu_only]`
`160`	`183`	`fn sinpi(self) -> Self {`
`161`	`184`	`unsafe { raw::sinpi(self) }`
`162`	`185`	`}`
`163`	`186`
	`187`	`+ #[gpu_only]`
`164`	`188`	`fn gamma(self) -> Self {`
`165`	`189`	`unsafe { raw::tgamma(self) }`
`166`	`190`	`}`
`167`	`191`
	`192`	`+ #[gpu_only]`
`168`	`193`	`fn y0(self) -> Self {`
`169`	`194`	`unsafe { raw::y0(self) }`
`170`	`195`	`}`
`171`	`196`
	`197`	`+ #[gpu_only]`
`172`	`198`	`fn y1(self) -> Self {`
`173`	`199`	`unsafe { raw::y1(self) }`
`174`	`200`	`}`
`175`	`201`
	`202`	`+ #[gpu_only]`
`176`	`203`	`fn yn(self, order: i32) -> Self {`
`177`	`204`	`unsafe { raw::yn(order, self) }`
`178`	`205`	`}`
`179`	`206`	`}`
`180`	`207`
`181`	`208`	`impl FloatExt for f32 {`
	`209`	`+ #[gpu_only]`
`182`	`210`	`fn cospi(self) -> Self {`
`183`	`211`	`unsafe { raw::cospif(self) }`
`184`	`212`	`}`
`185`	`213`
	`214`	`+ #[gpu_only]`
`186`	`215`	`fn error_function(self) -> Self {`
`187`	`216`	`unsafe { raw::erff(self) }`
`188`	`217`	`}`
`189`	`218`
	`219`	`+ #[gpu_only]`
`190`	`220`	`fn complementary_error_function(self) -> Self {`
`191`	`221`	`unsafe { raw::erfcf(self) }`
`192`	`222`	`}`
`193`	`223`
	`224`	`+ #[gpu_only]`
`194`	`225`	`fn inv_complementary_error_function(self) -> Self {`
`195`	`226`	`unsafe { raw::erfcinvf(self) }`
`196`	`227`	`}`
`197`	`228`
	`229`	`+ #[gpu_only]`
`198`	`230`	`fn scaled_complementary_error_function(self) -> Self {`
`199`	`231`	`unsafe { raw::erfcxf(self) }`
`200`	`232`	`}`
`201`	`233`
	`234`	`+ #[gpu_only]`
`202`	`235`	`fn frexp(self) -> (Self, i32) {`
`203`	`236`	`let mut exp = 0;`
`204`	`237`	`unsafe {`
`@@ -207,54 +240,67 @@ impl FloatExt for f32 {`
`207`	`240`	`}`
`208`	`241`	`}`
`209`	`242`
	`243`	`+ #[gpu_only]`
`210`	`244`	`fn unbiased_exp(self) -> i32 {`
`211`	`245`	`unsafe { raw::ilogbf(self) }`
`212`	`246`	`}`
`213`	`247`
	`248`	`+ #[gpu_only]`
`214`	`249`	`fn j0(self) -> Self {`
`215`	`250`	`unsafe { raw::j0f(self) }`
`216`	`251`	`}`
`217`	`252`
	`253`	`+ #[gpu_only]`
`218`	`254`	`fn j1(self) -> Self {`
`219`	`255`	`unsafe { raw::j1f(self) }`
`220`	`256`	`}`
`221`	`257`
	`258`	`+ #[gpu_only]`
`222`	`259`	`fn jn(self, order: i32) -> Self {`
`223`	`260`	`unsafe { raw::jnf(order, self) }`
`224`	`261`	`}`
`225`	`262`
	`263`	`+ #[gpu_only]`
`226`	`264`	`fn ldexp(self, exp: i32) -> Self {`
`227`	`265`	`unsafe { raw::ldexpf(self, exp) }`
`228`	`266`	`}`
`229`	`267`
	`268`	`+ #[gpu_only]`
`230`	`269`	`fn log_gamma(self) -> Self {`
`231`	`270`	`unsafe { raw::lgammaf(self) }`
`232`	`271`	`}`
`233`	`272`
	`273`	`+ #[gpu_only]`
`234`	`274`	`fn log1p(self) -> Self {`
`235`	`275`	`unsafe { raw::log1pf(self) }`
`236`	`276`	`}`
`237`	`277`
	`278`	`+ #[gpu_only]`
`238`	`279`	`fn norm_cdf(self) -> Self {`
`239`	`280`	`unsafe { raw::normcdff(self) }`
`240`	`281`	`}`
`241`	`282`
	`283`	`+ #[gpu_only]`
`242`	`284`	`fn inv_norm_cdf(self) -> Self {`
`243`	`285`	`unsafe { raw::normcdfinvf(self) }`
`244`	`286`	`}`
`245`	`287`
	`288`	`+ #[gpu_only]`
`246`	`289`	`fn rcbrt(self) -> Self {`
`247`	`290`	`unsafe { raw::rcbrtf(self) }`
`248`	`291`	`}`
`249`	`292`
	`293`	`+ #[gpu_only]`
`250`	`294`	`fn saturate(self) -> Self {`
`251`	`295`	`unsafe { raw::saturatef(self) }`
`252`	`296`	`}`
`253`	`297`
	`298`	`+ #[gpu_only]`
`254`	`299`	`fn scale_by_n(self, exp: i32) -> Self {`
`255`	`300`	`unsafe { raw::scalbnf(self, exp) }`
`256`	`301`	`}`
`257`	`302`
	`303`	`+ #[gpu_only]`
`258`	`304`	`fn sincospi(self) -> (Self, Self) {`
`259`	`305`	`let mut sin = 0.0;`
`260`	`306`	`let mut cos = 0.0;`
`@@ -264,22 +310,27 @@ impl FloatExt for f32 {`
`264`	`310`	`(sin, cos)`
`265`	`311`	`}`
`266`	`312`
	`313`	`+ #[gpu_only]`
`267`	`314`	`fn sinpi(self) -> Self {`
`268`	`315`	`unsafe { raw::sinpif(self) }`
`269`	`316`	`}`
`270`	`317`
	`318`	`+ #[gpu_only]`
`271`	`319`	`fn gamma(self) -> Self {`
`272`	`320`	`unsafe { raw::tgammaf(self) }`
`273`	`321`	`}`
`274`	`322`
	`323`	`+ #[gpu_only]`
`275`	`324`	`fn y0(self) -> Self {`
`276`	`325`	`unsafe { raw::y0f(self) }`
`277`	`326`	`}`
`278`	`327`
	`328`	`+ #[gpu_only]`
`279`	`329`	`fn y1(self) -> Self {`
`280`	`330`	`unsafe { raw::y1f(self) }`
`281`	`331`	`}`
`282`	`332`
	`333`	`+ #[gpu_only]`
`283`	`334`	`fn yn(self, order: i32) -> Self {`
`284`	`335`	`unsafe { raw::ynf(order, self) }`
`285`	`336`	`}`