Mark functions in float_ext.rs as gpu_only

Schmiedium · Schmiedium · commit 7e2d1d19e4a1 · 2025-03-17T18:50:22.000-04:00
Different behavior between linux and windows linkers causes windows CI to fail when compiliing crates that need cuda_std. https://learn.microsoft.com/en-us/cpp/error-messages/tool-errors/linker-tools-error-lnk2019?view=msvc-170#you-call-intrinsic-functions-or-pass-argument-types-to-intrinsic-functions-that-arent-supported-on-your-target-architecture This link contains the exact reason for the error message. Compiling float_ext for an architecture other than nvptx caused the linker to fail with LNK2019, for the reason linked.
diff --git a/crates/cuda_std/src/float_ext.rs b/crates/cuda_std/src/float_ext.rs
@@ -1,7 +1,7 @@
 //! Extension trait for [`f32`] and [`f64`], providing high level wrappers on top of
 //! raw libdevice intrinsics from [`intrinsics`](crate::intrinsics).
 
-use crate::intrinsics as raw;
+use cuda_std_macros::gpu_only;
 
 // allows us to add new functions to the trait at any time without needing a new major version.
 mod private {
@@ -71,26 +71,32 @@ pub trait FloatExt: Sized + private::Sealed {
 }
 
 impl FloatExt for f64 {
+    #[gpu_only]
     fn cospi(self) -> Self {
         unsafe { raw::cospi(self) }
     }
 
+    #[gpu_only]
     fn error_function(self) -> Self {
         unsafe { raw::erf(self) }
     }
 
+    #[gpu_only]
     fn complementary_error_function(self) -> Self {
         unsafe { raw::erfc(self) }
     }
 
+    #[gpu_only]
     fn inv_complementary_error_function(self) -> Self {
         unsafe { raw::erfcinv(self) }
     }
 
+    #[gpu_only]
     fn scaled_complementary_error_function(self) -> Self {
         unsafe { raw::erfcx(self) }
     }
 
+    #[gpu_only]
     fn frexp(self) -> (Self, i32) {
         let mut exp = 0;
         unsafe {
@@ -99,55 +105,68 @@ impl FloatExt for f64 {
         }
     }
 
+    #[gpu_only]
     fn unbiased_exp(self) -> i32 {
         unsafe { raw::ilogb(self) }
     }
 
+    #[gpu_only]
     fn j0(self) -> Self {
         unsafe { raw::j0(self) }
     }
 
+    #[gpu_only]
     fn j1(self) -> Self {
         unsafe { raw::j1(self) }
     }
 
+    #[gpu_only]
     fn jn(self, order: i32) -> Self {
         unsafe { raw::jn(order, self) }
     }
 
+    #[gpu_only]
     fn ldexp(self, exp: i32) -> Self {
         unsafe { raw::ldexp(self, exp) }
     }
 
+    #[gpu_only]
     fn log_gamma(self) -> Self {
         unsafe { raw::lgamma(self) }
     }
 
+    #[gpu_only]
     fn log1p(self) -> Self {
         unsafe { raw::log1p(self) }
     }
 
+    #[gpu_only]
     fn norm_cdf(self) -> Self {
         unsafe { raw::normcdf(self) }
     }
 
+    #[gpu_only]
     fn inv_norm_cdf(self) -> Self {
         unsafe { raw::normcdfinv(self) }
     }
 
+    #[gpu_only]
     fn rcbrt(self) -> Self {
         unsafe { raw::rcbrt(self) }
     }
 
+    #[gpu_only]
     fn saturate(self) -> Self {
         // this intrinsic doesnt actually exit on f64, so implement it as clamp on f64
         self.clamp(0.0, 1.0)
     }
 
+    #[gpu_only]
     fn scale_by_n(self, exp: i32) -> Self {
         unsafe { raw::scalbn(self, exp) }
     }
 
+    #[gpu_only]
     fn sincospi(self) -> (Self, Self) {
         let mut sin = 0.0;
         let mut cos = 0.0;
@@ -157,48 +176,59 @@ impl FloatExt for f64 {
         (sin, cos)
     }
 
+    #[gpu_only]
     fn sinpi(self) -> Self {
         unsafe { raw::sinpi(self) }
     }
 
+    #[gpu_only]
     fn gamma(self) -> Self {
         unsafe { raw::tgamma(self) }
     }
 
+    #[gpu_only]
     fn y0(self) -> Self {
         unsafe { raw::y0(self) }
     }
 
+    #[gpu_only]
     fn y1(self) -> Self {
         unsafe { raw::y1(self) }
     }
 
+    #[gpu_only]
     fn yn(self, order: i32) -> Self {
         unsafe { raw::yn(order, self) }
     }
 }
 
 impl FloatExt for f32 {
+    #[gpu_only]
     fn cospi(self) -> Self {
         unsafe { raw::cospif(self) }
     }
 
+    #[gpu_only]
     fn error_function(self) -> Self {
         unsafe { raw::erff(self) }
     }
 
+    #[gpu_only]
     fn complementary_error_function(self) -> Self {
         unsafe { raw::erfcf(self) }
     }
 
+    #[gpu_only]
     fn inv_complementary_error_function(self) -> Self {
         unsafe { raw::erfcinvf(self) }
     }
 
+    #[gpu_only]
     fn scaled_complementary_error_function(self) -> Self {
         unsafe { raw::erfcxf(self) }
     }
 
+    #[gpu_only]
     fn frexp(self) -> (Self, i32) {
         let mut exp = 0;
         unsafe {
@@ -207,54 +237,67 @@ impl FloatExt for f32 {
         }
     }
 
+    #[gpu_only]
     fn unbiased_exp(self) -> i32 {
         unsafe { raw::ilogbf(self) }
     }
 
+    #[gpu_only]
     fn j0(self) -> Self {
         unsafe { raw::j0f(self) }
     }
 
+    #[gpu_only]
     fn j1(self) -> Self {
         unsafe { raw::j1f(self) }
     }
 
+    #[gpu_only]
     fn jn(self, order: i32) -> Self {
         unsafe { raw::jnf(order, self) }
     }
 
+    #[gpu_only]
     fn ldexp(self, exp: i32) -> Self {
         unsafe { raw::ldexpf(self, exp) }
     }
 
+    #[gpu_only]
     fn log_gamma(self) -> Self {
         unsafe { raw::lgammaf(self) }
     }
 
+    #[gpu_only]
     fn log1p(self) -> Self {
         unsafe { raw::log1pf(self) }
     }
 
+    #[gpu_only]
     fn norm_cdf(self) -> Self {
         unsafe { raw::normcdff(self) }
     }
 
+    #[gpu_only]
     fn inv_norm_cdf(self) -> Self {
         unsafe { raw::normcdfinvf(self) }
     }
 
+    #[gpu_only]
     fn rcbrt(self) -> Self {
         unsafe { raw::rcbrtf(self) }
     }
 
+    #[gpu_only]
     fn saturate(self) -> Self {
         unsafe { raw::saturatef(self) }
     }
 
+    #[gpu_only]
     fn scale_by_n(self, exp: i32) -> Self {
         unsafe { raw::scalbnf(self, exp) }
     }
 
+    #[gpu_only]
     fn sincospi(self) -> (Self, Self) {
         let mut sin = 0.0;
         let mut cos = 0.0;
@@ -264,22 +307,27 @@ impl FloatExt for f32 {
         (sin, cos)
     }
 
+    #[gpu_only]
     fn sinpi(self) -> Self {
         unsafe { raw::sinpif(self) }
     }
 
+    #[gpu_only]
     fn gamma(self) -> Self {
         unsafe { raw::tgammaf(self) }
     }
 
+    #[gpu_only]
     fn y0(self) -> Self {
         unsafe { raw::y0f(self) }
     }
 
+    #[gpu_only]
     fn y1(self) -> Self {
         unsafe { raw::y1f(self) }
     }
 
+    #[gpu_only]
     fn yn(self, order: i32) -> Self {
         unsafe { raw::ynf(order, self) }
     }

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	//! Extension trait for [`f32`] and [`f64`], providing high level wrappers on top of
`2`	`2`	//! raw libdevice intrinsics from [`intrinsics`](crate::intrinsics).
`3`	`3`
`4`		`-use crate::intrinsics as raw;`
	`4`	`+use cuda_std_macros::gpu_only;`
`5`	`5`
`6`	`6`	`// allows us to add new functions to the trait at any time without needing a new major version.`
`7`	`7`	`mod private {`
`@@ -71,26 +71,32 @@ pub trait FloatExt: Sized + private::Sealed {`
`71`	`71`	`}`
`72`	`72`
`73`	`73`	`impl FloatExt for f64 {`
	`74`	`+ #[gpu_only]`
`74`	`75`	`fn cospi(self) -> Self {`
`75`	`76`	`unsafe { raw::cospi(self) }`
`76`	`77`	`}`
`77`	`78`
	`79`	`+ #[gpu_only]`
`78`	`80`	`fn error_function(self) -> Self {`
`79`	`81`	`unsafe { raw::erf(self) }`
`80`	`82`	`}`
`81`	`83`
	`84`	`+ #[gpu_only]`
`82`	`85`	`fn complementary_error_function(self) -> Self {`
`83`	`86`	`unsafe { raw::erfc(self) }`
`84`	`87`	`}`
`85`	`88`
	`89`	`+ #[gpu_only]`
`86`	`90`	`fn inv_complementary_error_function(self) -> Self {`
`87`	`91`	`unsafe { raw::erfcinv(self) }`
`88`	`92`	`}`
`89`	`93`
	`94`	`+ #[gpu_only]`
`90`	`95`	`fn scaled_complementary_error_function(self) -> Self {`
`91`	`96`	`unsafe { raw::erfcx(self) }`
`92`	`97`	`}`
`93`	`98`
	`99`	`+ #[gpu_only]`
`94`	`100`	`fn frexp(self) -> (Self, i32) {`
`95`	`101`	`let mut exp = 0;`
`96`	`102`	`unsafe {`
`@@ -99,55 +105,68 @@ impl FloatExt for f64 {`
`99`	`105`	`}`
`100`	`106`	`}`
`101`	`107`
	`108`	`+ #[gpu_only]`
`102`	`109`	`fn unbiased_exp(self) -> i32 {`
`103`	`110`	`unsafe { raw::ilogb(self) }`
`104`	`111`	`}`
`105`	`112`
	`113`	`+ #[gpu_only]`
`106`	`114`	`fn j0(self) -> Self {`
`107`	`115`	`unsafe { raw::j0(self) }`
`108`	`116`	`}`
`109`	`117`
	`118`	`+ #[gpu_only]`
`110`	`119`	`fn j1(self) -> Self {`
`111`	`120`	`unsafe { raw::j1(self) }`
`112`	`121`	`}`
`113`	`122`
	`123`	`+ #[gpu_only]`
`114`	`124`	`fn jn(self, order: i32) -> Self {`
`115`	`125`	`unsafe { raw::jn(order, self) }`
`116`	`126`	`}`
`117`	`127`
	`128`	`+ #[gpu_only]`
`118`	`129`	`fn ldexp(self, exp: i32) -> Self {`
`119`	`130`	`unsafe { raw::ldexp(self, exp) }`
`120`	`131`	`}`
`121`	`132`
	`133`	`+ #[gpu_only]`
`122`	`134`	`fn log_gamma(self) -> Self {`
`123`	`135`	`unsafe { raw::lgamma(self) }`
`124`	`136`	`}`
`125`	`137`
	`138`	`+ #[gpu_only]`
`126`	`139`	`fn log1p(self) -> Self {`
`127`	`140`	`unsafe { raw::log1p(self) }`
`128`	`141`	`}`
`129`	`142`
	`143`	`+ #[gpu_only]`
`130`	`144`	`fn norm_cdf(self) -> Self {`
`131`	`145`	`unsafe { raw::normcdf(self) }`
`132`	`146`	`}`
`133`	`147`
	`148`	`+ #[gpu_only]`
`134`	`149`	`fn inv_norm_cdf(self) -> Self {`
`135`	`150`	`unsafe { raw::normcdfinv(self) }`
`136`	`151`	`}`
`137`	`152`
	`153`	`+ #[gpu_only]`
`138`	`154`	`fn rcbrt(self) -> Self {`
`139`	`155`	`unsafe { raw::rcbrt(self) }`
`140`	`156`	`}`
`141`	`157`
	`158`	`+ #[gpu_only]`
`142`	`159`	`fn saturate(self) -> Self {`
`143`	`160`	`// this intrinsic doesnt actually exit on f64, so implement it as clamp on f64`
`144`	`161`	`self.clamp(0.0, 1.0)`
`145`	`162`	`}`
`146`	`163`
	`164`	`+ #[gpu_only]`
`147`	`165`	`fn scale_by_n(self, exp: i32) -> Self {`
`148`	`166`	`unsafe { raw::scalbn(self, exp) }`
`149`	`167`	`}`
`150`	`168`
	`169`	`+ #[gpu_only]`
`151`	`170`	`fn sincospi(self) -> (Self, Self) {`
`152`	`171`	`let mut sin = 0.0;`
`153`	`172`	`let mut cos = 0.0;`
`@@ -157,48 +176,59 @@ impl FloatExt for f64 {`
`157`	`176`	`(sin, cos)`
`158`	`177`	`}`
`159`	`178`
	`179`	`+ #[gpu_only]`
`160`	`180`	`fn sinpi(self) -> Self {`
`161`	`181`	`unsafe { raw::sinpi(self) }`
`162`	`182`	`}`
`163`	`183`
	`184`	`+ #[gpu_only]`
`164`	`185`	`fn gamma(self) -> Self {`
`165`	`186`	`unsafe { raw::tgamma(self) }`
`166`	`187`	`}`
`167`	`188`
	`189`	`+ #[gpu_only]`
`168`	`190`	`fn y0(self) -> Self {`
`169`	`191`	`unsafe { raw::y0(self) }`
`170`	`192`	`}`
`171`	`193`
	`194`	`+ #[gpu_only]`
`172`	`195`	`fn y1(self) -> Self {`
`173`	`196`	`unsafe { raw::y1(self) }`
`174`	`197`	`}`
`175`	`198`
	`199`	`+ #[gpu_only]`
`176`	`200`	`fn yn(self, order: i32) -> Self {`
`177`	`201`	`unsafe { raw::yn(order, self) }`
`178`	`202`	`}`
`179`	`203`	`}`
`180`	`204`
`181`	`205`	`impl FloatExt for f32 {`
	`206`	`+ #[gpu_only]`
`182`	`207`	`fn cospi(self) -> Self {`
`183`	`208`	`unsafe { raw::cospif(self) }`
`184`	`209`	`}`
`185`	`210`
	`211`	`+ #[gpu_only]`
`186`	`212`	`fn error_function(self) -> Self {`
`187`	`213`	`unsafe { raw::erff(self) }`
`188`	`214`	`}`
`189`	`215`
	`216`	`+ #[gpu_only]`
`190`	`217`	`fn complementary_error_function(self) -> Self {`
`191`	`218`	`unsafe { raw::erfcf(self) }`
`192`	`219`	`}`
`193`	`220`
	`221`	`+ #[gpu_only]`
`194`	`222`	`fn inv_complementary_error_function(self) -> Self {`
`195`	`223`	`unsafe { raw::erfcinvf(self) }`
`196`	`224`	`}`
`197`	`225`
	`226`	`+ #[gpu_only]`
`198`	`227`	`fn scaled_complementary_error_function(self) -> Self {`
`199`	`228`	`unsafe { raw::erfcxf(self) }`
`200`	`229`	`}`
`201`	`230`
	`231`	`+ #[gpu_only]`
`202`	`232`	`fn frexp(self) -> (Self, i32) {`
`203`	`233`	`let mut exp = 0;`
`204`	`234`	`unsafe {`
`@@ -207,54 +237,67 @@ impl FloatExt for f32 {`
`207`	`237`	`}`
`208`	`238`	`}`
`209`	`239`
	`240`	`+ #[gpu_only]`
`210`	`241`	`fn unbiased_exp(self) -> i32 {`
`211`	`242`	`unsafe { raw::ilogbf(self) }`
`212`	`243`	`}`
`213`	`244`
	`245`	`+ #[gpu_only]`
`214`	`246`	`fn j0(self) -> Self {`
`215`	`247`	`unsafe { raw::j0f(self) }`
`216`	`248`	`}`
`217`	`249`
	`250`	`+ #[gpu_only]`
`218`	`251`	`fn j1(self) -> Self {`
`219`	`252`	`unsafe { raw::j1f(self) }`
`220`	`253`	`}`
`221`	`254`
	`255`	`+ #[gpu_only]`
`222`	`256`	`fn jn(self, order: i32) -> Self {`
`223`	`257`	`unsafe { raw::jnf(order, self) }`
`224`	`258`	`}`
`225`	`259`
	`260`	`+ #[gpu_only]`
`226`	`261`	`fn ldexp(self, exp: i32) -> Self {`
`227`	`262`	`unsafe { raw::ldexpf(self, exp) }`
`228`	`263`	`}`
`229`	`264`
	`265`	`+ #[gpu_only]`
`230`	`266`	`fn log_gamma(self) -> Self {`
`231`	`267`	`unsafe { raw::lgammaf(self) }`
`232`	`268`	`}`
`233`	`269`
	`270`	`+ #[gpu_only]`
`234`	`271`	`fn log1p(self) -> Self {`
`235`	`272`	`unsafe { raw::log1pf(self) }`
`236`	`273`	`}`
`237`	`274`
	`275`	`+ #[gpu_only]`
`238`	`276`	`fn norm_cdf(self) -> Self {`
`239`	`277`	`unsafe { raw::normcdff(self) }`
`240`	`278`	`}`
`241`	`279`
	`280`	`+ #[gpu_only]`
`242`	`281`	`fn inv_norm_cdf(self) -> Self {`
`243`	`282`	`unsafe { raw::normcdfinvf(self) }`
`244`	`283`	`}`
`245`	`284`
	`285`	`+ #[gpu_only]`
`246`	`286`	`fn rcbrt(self) -> Self {`
`247`	`287`	`unsafe { raw::rcbrtf(self) }`
`248`	`288`	`}`
`249`	`289`
	`290`	`+ #[gpu_only]`
`250`	`291`	`fn saturate(self) -> Self {`
`251`	`292`	`unsafe { raw::saturatef(self) }`
`252`	`293`	`}`
`253`	`294`
	`295`	`+ #[gpu_only]`
`254`	`296`	`fn scale_by_n(self, exp: i32) -> Self {`
`255`	`297`	`unsafe { raw::scalbnf(self, exp) }`
`256`	`298`	`}`
`257`	`299`
	`300`	`+ #[gpu_only]`
`258`	`301`	`fn sincospi(self) -> (Self, Self) {`
`259`	`302`	`let mut sin = 0.0;`
`260`	`303`	`let mut cos = 0.0;`
`@@ -264,22 +307,27 @@ impl FloatExt for f32 {`
`264`	`307`	`(sin, cos)`
`265`	`308`	`}`
`266`	`309`
	`310`	`+ #[gpu_only]`
`267`	`311`	`fn sinpi(self) -> Self {`
`268`	`312`	`unsafe { raw::sinpif(self) }`
`269`	`313`	`}`
`270`	`314`
	`315`	`+ #[gpu_only]`
`271`	`316`	`fn gamma(self) -> Self {`
`272`	`317`	`unsafe { raw::tgammaf(self) }`
`273`	`318`	`}`
`274`	`319`
	`320`	`+ #[gpu_only]`
`275`	`321`	`fn y0(self) -> Self {`
`276`	`322`	`unsafe { raw::y0f(self) }`
`277`	`323`	`}`
`278`	`324`
	`325`	`+ #[gpu_only]`
`279`	`326`	`fn y1(self) -> Self {`
`280`	`327`	`unsafe { raw::y1f(self) }`
`281`	`328`	`}`
`282`	`329`
	`330`	`+ #[gpu_only]`
`283`	`331`	`fn yn(self, order: i32) -> Self {`
`284`	`332`	`unsafe { raw::ynf(order, self) }`
`285`	`333`	`}`