Rust-GPU · LegNeato · Mar 8, 2025 · Mar 8, 2025
diff --git a/crates/cuda_std/src/float.rs b/crates/cuda_std/src/float.rs
@@ -54,25 +54,25 @@ mod private {
 
 macro_rules! f32_intrinsic {
     ($self:expr, $func:ident($($param:expr),*)) => {{
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = $self.$func($($param),*);
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = paste::paste! { unsafe { intrinsics::[<$func f>]($self, $($param),*)} };
         val
     }};
 }
 
 macro_rules! f64_intrinsic {
     ($self:expr, $func:ident($($param:expr),*)) => {{
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = $self.$func($($param),*);
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = unsafe { intrinsics::$func($self, $($param),*)};
         val
     }};
 }
 
-#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+#[cfg(target_arch = "nvptx64")]
 use crate::intrinsics;
 
 impl GpuFloat for f32 {
@@ -117,9 +117,9 @@ impl GpuFloat for f32 {
     #[must_use = "method returns a new number and does not mutate the original value"]
     #[inline]
     fn abs(self) -> f32 {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.abs();
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = { unsafe { intrinsics::fabsf(self) } };
         val
     }
@@ -161,9 +161,9 @@ impl GpuFloat for f32 {
     #[must_use = "method returns a new number and does not mutate the original value"]
     #[inline]
     fn mul_add(self, a: f32, b: f32) -> f32 {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.mul_add(a, b);
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = { unsafe { intrinsics::fmaf(self, a, b) } };
         val
     }
@@ -218,9 +218,9 @@ impl GpuFloat for f32 {
     #[must_use = "method returns a new number and does not mutate the original value"]
     #[inline]
     fn powf(self, n: f32) -> f32 {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.powf(n);
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = { unsafe { intrinsics::powf(self, n) } };
         val
     }
@@ -252,9 +252,9 @@ impl GpuFloat for f32 {
     #[must_use = "method returns a new number and does not mutate the original value"]
     #[inline]
     fn ln(self) -> f32 {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.ln();
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = { unsafe { intrinsics::logf(self) } };
         val
     }
@@ -362,9 +362,9 @@ impl GpuFloat for f32 {
     /// `(sin(x), cos(x))`.
     #[inline]
     fn sin_cos(self) -> (f32, f32) {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.sin_cos();
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = {
             let mut sptr = 0.0;
             let mut cptr = 0.0;
@@ -381,9 +381,9 @@ impl GpuFloat for f32 {
     #[must_use = "method returns a new number and does not mutate the original value"]
     #[inline]
     fn exp_m1(self) -> f32 {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.exp_m1();
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = { unsafe { intrinsics::expm1f(self) } };
         val
     }
@@ -393,9 +393,9 @@ impl GpuFloat for f32 {
     #[must_use = "method returns a new number and does not mutate the original value"]
     #[inline]
     fn ln_1p(self) -> f32 {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.ln_1p();
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = { unsafe { intrinsics::log1pf(self) } };
         val
     }
@@ -485,9 +485,9 @@ impl GpuFloat for f64 {
     #[must_use = "method returns a new number and does not mutate the original value"]
     #[inline]
     fn abs(self) -> f64 {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.abs();
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = { unsafe { intrinsics::fabs(self) } };
         val
     }
@@ -529,9 +529,9 @@ impl GpuFloat for f64 {
     #[must_use = "method returns a new number and does not mutate the original value"]
     #[inline]
     fn mul_add(self, a: f64, b: f64) -> f64 {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.mul_add(a, b);
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = { unsafe { intrinsics::fma(self, a, b) } };
         val
     }
@@ -586,9 +586,9 @@ impl GpuFloat for f64 {
     #[must_use = "method returns a new number and does not mutate the original value"]
     #[inline]
     fn powf(self, n: f64) -> f64 {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.powf(n);
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = { unsafe { intrinsics::pow(self, n) } };
         val
     }
@@ -620,9 +620,9 @@ impl GpuFloat for f64 {
     #[must_use = "method returns a new number and does not mutate the original value"]
     #[inline]
     fn ln(self) -> f64 {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.ln();
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = { unsafe { intrinsics::log(self) } };
         val
     }
@@ -730,9 +730,9 @@ impl GpuFloat for f64 {
     /// `(sin(x), cos(x))`.
     #[inline]
     fn sin_cos(self) -> (f64, f64) {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.sin_cos();
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = {
             let mut sptr = 0.0;
             let mut cptr = 0.0;
@@ -749,9 +749,9 @@ impl GpuFloat for f64 {
     #[must_use = "method returns a new number and does not mutate the original value"]
     #[inline]
     fn exp_m1(self) -> f64 {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.exp_m1();
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = { unsafe { intrinsics::expm1(self) } };
         val
     }
@@ -761,9 +761,9 @@ impl GpuFloat for f64 {
     #[must_use = "method returns a new number and does not mutate the original value"]
     #[inline]
     fn ln_1p(self) -> f64 {
-        #[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
+        #[cfg(not(target_arch = "nvptx64"))]
         let val = self.ln_1p();
-        #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+        #[cfg(target_arch = "nvptx64")]
         let val = { unsafe { intrinsics::log1p(self) } };
         val
     }

diff --git a/crates/cuda_std/src/mem.rs b/crates/cuda_std/src/mem.rs
@@ -1,12 +1,12 @@
 //! Support for allocating memory and using `alloc` using CUDA memory allocation system-calls.
 
 use crate::gpu_only;
-#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+#[cfg(target_arch = "nvptx64")]
 use alloc::alloc::*;
-#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+#[cfg(target_arch = "nvptx64")]
 use core::ffi::c_void;
 
-#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+#[cfg(target_arch = "nvptx64")]
 extern "C" {
     // implicitly defined by cuda.
     pub fn malloc(size: usize) -> *mut c_void;
@@ -16,7 +16,7 @@ extern "C" {
 
 pub struct CUDAAllocator;
 
-#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+#[cfg(target_arch = "nvptx64")]
 unsafe impl GlobalAlloc for CUDAAllocator {
     unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
         malloc(layout.size()) as *mut u8
@@ -26,7 +26,7 @@ unsafe impl GlobalAlloc for CUDAAllocator {
     }
 }
 
-#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
+#[cfg(target_arch = "nvptx64")]
 #[global_allocator]
 pub static GLOBAL_ALLOCATOR: CUDAAllocator = CUDAAllocator;
 

diff --git a/crates/cust/src/memory/device/device_buffer.rs b/crates/cust/src/memory/device/device_buffer.rs
@@ -314,7 +314,7 @@ impl<A: DeviceCopy + Pod> DeviceBuffer<A> {
     /// whole number of elements. Such as `3` x [`u16`] -> `1.5` x [`u32`].
     /// - If either type is a ZST (but not both).
     #[cfg_attr(docsrs, doc(cfg(feature = "bytemuck")))]
-    pub fn try_cast<B: Pod + DeviceCopy>(mut self) -> Result<DeviceBuffer<B>, PodCastError> {
+    pub fn try_cast<B: Pod + DeviceCopy>(self) -> Result<DeviceBuffer<B>, PodCastError> {
         if align_of::<B>() > align_of::<A>() && (self.buf.as_raw() as usize) % align_of::<B>() != 0
         {
             Err(PodCastError::TargetAlignmentGreaterAndInputNotAligned)
@@ -329,9 +329,7 @@ impl<A: DeviceCopy + Pod> DeviceBuffer<A> {
                 buf: self.buf.cast(),
                 len: new_len,
             });
-            unsafe {
-                std::mem::forget(self);
-            }
+            std::mem::forget(self);
             ret
         } else {
             Err(PodCastError::OutputSliceWouldHaveSlop)

diff --git a/crates/gpu_rand/src/default.rs b/crates/gpu_rand/src/default.rs
@@ -37,10 +37,6 @@ impl RngCore for DefaultRand {
     fn fill_bytes(&mut self, dest: &mut [u8]) {
         self.inner.fill_bytes(dest)
     }
-
-    fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand_core::Error> {
-        self.inner.try_fill_bytes(dest)
-    }
 }
 
 impl SeedableRng for DefaultRand {

diff --git a/crates/gpu_rand/src/xoroshiro/common.rs b/crates/gpu_rand/src/xoroshiro/common.rs
@@ -2,7 +2,7 @@
 macro_rules! from_splitmix {
     ($seed:expr) => {{
         let mut rng = crate::xoroshiro::SplitMix64::seed_from_u64($seed);
-        Self::from_rng(&mut rng).unwrap()
+        Self::from_rng(&mut rng)
     }};
 }
 
@@ -327,3 +327,9 @@ impl AsMut<[u8]> for Seed512 {
         &mut self.0
     }
 }
+
+impl AsRef<[u8]> for Seed512 {
+    fn as_ref(&self) -> &[u8] {
+        &self.0
+    }
+}
diff --git a/crates/gpu_rand/src/xoroshiro/splitmix64.rs b/crates/gpu_rand/src/xoroshiro/splitmix64.rs
@@ -1,6 +1,6 @@
 use rand_core::impls::fill_bytes_via_next;
 use rand_core::le::read_u64_into;
-use rand_core::{Error, RngCore, SeedableRng};
+use rand_core::{RngCore, SeedableRng};
 
 /// A splitmix64 random number generator.
 ///
@@ -49,12 +49,6 @@ impl RngCore for SplitMix64 {
     fn fill_bytes(&mut self, dest: &mut [u8]) {
         fill_bytes_via_next(self, dest);
     }
-
-    #[inline]
-    fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {
-        self.fill_bytes(dest);
-        Ok(())
-    }
 }
 
 impl SeedableRng for SplitMix64 {

diff --git a/crates/gpu_rand/src/xoroshiro/xoroshiro128plus.rs b/crates/gpu_rand/src/xoroshiro/xoroshiro128plus.rs
@@ -78,12 +78,6 @@ impl RngCore for Xoroshiro128Plus {
     fn fill_bytes(&mut self, dest: &mut [u8]) {
         fill_bytes_via_next(self, dest);
     }
-
-    #[inline]
-    fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand_core::Error> {
-        self.fill_bytes(dest);
-        Ok(())
-    }
 }
 
 impl SeedableRng for Xoroshiro128Plus {

diff --git a/crates/gpu_rand/src/xoroshiro/xoroshiro128plusplus.rs b/crates/gpu_rand/src/xoroshiro/xoroshiro128plusplus.rs
@@ -75,12 +75,6 @@ impl RngCore for Xoroshiro128PlusPlus {
     fn fill_bytes(&mut self, dest: &mut [u8]) {
         fill_bytes_via_next(self, dest);
     }
-
-    #[inline]
-    fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand_core::Error> {
-        self.fill_bytes(dest);
-        Ok(())
-    }
 }
 
 impl SeedableRng for Xoroshiro128PlusPlus {

diff --git a/crates/gpu_rand/src/xoroshiro/xoroshiro128starstar.rs b/crates/gpu_rand/src/xoroshiro/xoroshiro128starstar.rs
@@ -75,12 +75,6 @@ impl RngCore for Xoroshiro128StarStar {
     fn fill_bytes(&mut self, dest: &mut [u8]) {
         fill_bytes_via_next(self, dest);
     }
-
-    #[inline]
-    fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand_core::Error> {
-        self.fill_bytes(dest);
-        Ok(())
-    }
 }
 
 impl SeedableRng for Xoroshiro128StarStar {

diff --git a/crates/gpu_rand/src/xoroshiro/xoroshiro64star.rs b/crates/gpu_rand/src/xoroshiro/xoroshiro64star.rs
@@ -38,12 +38,6 @@ impl RngCore for Xoroshiro64Star {
     fn fill_bytes(&mut self, dest: &mut [u8]) {
         fill_bytes_via_next(self, dest);
     }
-
-    #[inline]
-    fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand_core::Error> {
-        self.fill_bytes(dest);
-        Ok(())
-    }
 }
 
 impl SeedableRng for Xoroshiro64Star {

diff --git a/crates/gpu_rand/src/xoroshiro/xoroshiro64starstar.rs b/crates/gpu_rand/src/xoroshiro/xoroshiro64starstar.rs
@@ -37,12 +37,6 @@ impl RngCore for Xoroshiro64StarStar {
     fn fill_bytes(&mut self, dest: &mut [u8]) {
         fill_bytes_via_next(self, dest);
     }
-
-    #[inline]
-    fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand_core::Error> {
-        self.fill_bytes(dest);
-        Ok(())
-    }
 }
 
 impl SeedableRng for Xoroshiro64StarStar {

diff --git a/crates/gpu_rand/src/xoroshiro/xoshiro128plus.rs b/crates/gpu_rand/src/xoroshiro/xoshiro128plus.rs
@@ -1,6 +1,6 @@
 use rand_core::impls::{fill_bytes_via_next, next_u64_via_u32};
 use rand_core::le::read_u32_into;
-use rand_core::{Error, RngCore, SeedableRng};
+use rand_core::{RngCore, SeedableRng};
 
 /// A xoshiro128+ random number generator.
 ///
@@ -84,12 +84,6 @@ impl RngCore for Xoshiro128Plus {
     fn fill_bytes(&mut self, dest: &mut [u8]) {
         fill_bytes_via_next(self, dest);
     }
-
-    #[inline]
-    fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {
-        self.fill_bytes(dest);
-        Ok(())
-    }
 }
 
 #[cfg(test)]