Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 33 additions & 33 deletions crates/cuda_std/src/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,25 +54,25 @@ mod private {

macro_rules! f32_intrinsic {
($self:expr, $func:ident($($param:expr),*)) => {{
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = $self.$func($($param),*);
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = paste::paste! { unsafe { intrinsics::[<$func f>]($self, $($param),*)} };
val
}};
}

macro_rules! f64_intrinsic {
($self:expr, $func:ident($($param:expr),*)) => {{
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = $self.$func($($param),*);
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = unsafe { intrinsics::$func($self, $($param),*)};
val
}};
}

#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
use crate::intrinsics;

impl GpuFloat for f32 {
Expand Down Expand Up @@ -117,9 +117,9 @@ impl GpuFloat for f32 {
#[must_use = "method returns a new number and does not mutate the original value"]
#[inline]
fn abs(self) -> f32 {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.abs();
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = { unsafe { intrinsics::fabsf(self) } };
val
}
Expand Down Expand Up @@ -161,9 +161,9 @@ impl GpuFloat for f32 {
#[must_use = "method returns a new number and does not mutate the original value"]
#[inline]
fn mul_add(self, a: f32, b: f32) -> f32 {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.mul_add(a, b);
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = { unsafe { intrinsics::fmaf(self, a, b) } };
val
}
Expand Down Expand Up @@ -218,9 +218,9 @@ impl GpuFloat for f32 {
#[must_use = "method returns a new number and does not mutate the original value"]
#[inline]
fn powf(self, n: f32) -> f32 {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.powf(n);
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = { unsafe { intrinsics::powf(self, n) } };
val
}
Expand Down Expand Up @@ -252,9 +252,9 @@ impl GpuFloat for f32 {
#[must_use = "method returns a new number and does not mutate the original value"]
#[inline]
fn ln(self) -> f32 {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.ln();
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = { unsafe { intrinsics::logf(self) } };
val
}
Expand Down Expand Up @@ -362,9 +362,9 @@ impl GpuFloat for f32 {
/// `(sin(x), cos(x))`.
#[inline]
fn sin_cos(self) -> (f32, f32) {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.sin_cos();
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = {
let mut sptr = 0.0;
let mut cptr = 0.0;
Expand All @@ -381,9 +381,9 @@ impl GpuFloat for f32 {
#[must_use = "method returns a new number and does not mutate the original value"]
#[inline]
fn exp_m1(self) -> f32 {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.exp_m1();
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = { unsafe { intrinsics::expm1f(self) } };
val
}
Expand All @@ -393,9 +393,9 @@ impl GpuFloat for f32 {
#[must_use = "method returns a new number and does not mutate the original value"]
#[inline]
fn ln_1p(self) -> f32 {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.ln_1p();
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = { unsafe { intrinsics::log1pf(self) } };
val
}
Expand Down Expand Up @@ -485,9 +485,9 @@ impl GpuFloat for f64 {
#[must_use = "method returns a new number and does not mutate the original value"]
#[inline]
fn abs(self) -> f64 {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.abs();
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = { unsafe { intrinsics::fabs(self) } };
val
}
Expand Down Expand Up @@ -529,9 +529,9 @@ impl GpuFloat for f64 {
#[must_use = "method returns a new number and does not mutate the original value"]
#[inline]
fn mul_add(self, a: f64, b: f64) -> f64 {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.mul_add(a, b);
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = { unsafe { intrinsics::fma(self, a, b) } };
val
}
Expand Down Expand Up @@ -586,9 +586,9 @@ impl GpuFloat for f64 {
#[must_use = "method returns a new number and does not mutate the original value"]
#[inline]
fn powf(self, n: f64) -> f64 {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.powf(n);
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = { unsafe { intrinsics::pow(self, n) } };
val
}
Expand Down Expand Up @@ -620,9 +620,9 @@ impl GpuFloat for f64 {
#[must_use = "method returns a new number and does not mutate the original value"]
#[inline]
fn ln(self) -> f64 {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.ln();
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = { unsafe { intrinsics::log(self) } };
val
}
Expand Down Expand Up @@ -730,9 +730,9 @@ impl GpuFloat for f64 {
/// `(sin(x), cos(x))`.
#[inline]
fn sin_cos(self) -> (f64, f64) {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.sin_cos();
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = {
let mut sptr = 0.0;
let mut cptr = 0.0;
Expand All @@ -749,9 +749,9 @@ impl GpuFloat for f64 {
#[must_use = "method returns a new number and does not mutate the original value"]
#[inline]
fn exp_m1(self) -> f64 {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.exp_m1();
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = { unsafe { intrinsics::expm1(self) } };
val
}
Expand All @@ -761,9 +761,9 @@ impl GpuFloat for f64 {
#[must_use = "method returns a new number and does not mutate the original value"]
#[inline]
fn ln_1p(self) -> f64 {
#[cfg(not(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[cfg(not(target_arch = "nvptx64"))]
let val = self.ln_1p();
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
let val = { unsafe { intrinsics::log1p(self) } };
val
}
Expand Down
10 changes: 5 additions & 5 deletions crates/cuda_std/src/mem.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
//! Support for allocating memory and using `alloc` using CUDA memory allocation system-calls.

use crate::gpu_only;
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
use alloc::alloc::*;
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
use core::ffi::c_void;

#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
extern "C" {
// implicitly defined by cuda.
pub fn malloc(size: usize) -> *mut c_void;
Expand All @@ -16,7 +16,7 @@ extern "C" {

pub struct CUDAAllocator;

#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
unsafe impl GlobalAlloc for CUDAAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
malloc(layout.size()) as *mut u8
Expand All @@ -26,7 +26,7 @@ unsafe impl GlobalAlloc for CUDAAllocator {
}
}

#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64"))]
#[cfg(target_arch = "nvptx64")]
#[global_allocator]
pub static GLOBAL_ALLOCATOR: CUDAAllocator = CUDAAllocator;

Expand Down
6 changes: 2 additions & 4 deletions crates/cust/src/memory/device/device_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ impl<A: DeviceCopy + Pod> DeviceBuffer<A> {
/// whole number of elements. Such as `3` x [`u16`] -> `1.5` x [`u32`].
/// - If either type is a ZST (but not both).
#[cfg_attr(docsrs, doc(cfg(feature = "bytemuck")))]
pub fn try_cast<B: Pod + DeviceCopy>(mut self) -> Result<DeviceBuffer<B>, PodCastError> {
pub fn try_cast<B: Pod + DeviceCopy>(self) -> Result<DeviceBuffer<B>, PodCastError> {
if align_of::<B>() > align_of::<A>() && (self.buf.as_raw() as usize) % align_of::<B>() != 0
{
Err(PodCastError::TargetAlignmentGreaterAndInputNotAligned)
Expand All @@ -329,9 +329,7 @@ impl<A: DeviceCopy + Pod> DeviceBuffer<A> {
buf: self.buf.cast(),
len: new_len,
});
unsafe {
std::mem::forget(self);
}
std::mem::forget(self);
ret
} else {
Err(PodCastError::OutputSliceWouldHaveSlop)
Expand Down
4 changes: 0 additions & 4 deletions crates/gpu_rand/src/default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@ impl RngCore for DefaultRand {
fn fill_bytes(&mut self, dest: &mut [u8]) {
self.inner.fill_bytes(dest)
}

fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand_core::Error> {
self.inner.try_fill_bytes(dest)
}
}

impl SeedableRng for DefaultRand {
Expand Down
8 changes: 7 additions & 1 deletion crates/gpu_rand/src/xoroshiro/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
macro_rules! from_splitmix {
($seed:expr) => {{
let mut rng = crate::xoroshiro::SplitMix64::seed_from_u64($seed);
Self::from_rng(&mut rng).unwrap()
Self::from_rng(&mut rng)
}};
}

Expand Down Expand Up @@ -327,3 +327,9 @@ impl AsMut<[u8]> for Seed512 {
&mut self.0
}
}

impl AsRef<[u8]> for Seed512 {
fn as_ref(&self) -> &[u8] {
&self.0
}
}
8 changes: 1 addition & 7 deletions crates/gpu_rand/src/xoroshiro/splitmix64.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use rand_core::impls::fill_bytes_via_next;
use rand_core::le::read_u64_into;
use rand_core::{Error, RngCore, SeedableRng};
use rand_core::{RngCore, SeedableRng};

/// A splitmix64 random number generator.
///
Expand Down Expand Up @@ -49,12 +49,6 @@ impl RngCore for SplitMix64 {
fn fill_bytes(&mut self, dest: &mut [u8]) {
fill_bytes_via_next(self, dest);
}

#[inline]
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {
self.fill_bytes(dest);
Ok(())
}
}

impl SeedableRng for SplitMix64 {
Expand Down
6 changes: 0 additions & 6 deletions crates/gpu_rand/src/xoroshiro/xoroshiro128plus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,6 @@ impl RngCore for Xoroshiro128Plus {
fn fill_bytes(&mut self, dest: &mut [u8]) {
fill_bytes_via_next(self, dest);
}

#[inline]
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand_core::Error> {
self.fill_bytes(dest);
Ok(())
}
}

impl SeedableRng for Xoroshiro128Plus {
Expand Down
6 changes: 0 additions & 6 deletions crates/gpu_rand/src/xoroshiro/xoroshiro128plusplus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,6 @@ impl RngCore for Xoroshiro128PlusPlus {
fn fill_bytes(&mut self, dest: &mut [u8]) {
fill_bytes_via_next(self, dest);
}

#[inline]
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand_core::Error> {
self.fill_bytes(dest);
Ok(())
}
}

impl SeedableRng for Xoroshiro128PlusPlus {
Expand Down
6 changes: 0 additions & 6 deletions crates/gpu_rand/src/xoroshiro/xoroshiro128starstar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,6 @@ impl RngCore for Xoroshiro128StarStar {
fn fill_bytes(&mut self, dest: &mut [u8]) {
fill_bytes_via_next(self, dest);
}

#[inline]
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand_core::Error> {
self.fill_bytes(dest);
Ok(())
}
}

impl SeedableRng for Xoroshiro128StarStar {
Expand Down
6 changes: 0 additions & 6 deletions crates/gpu_rand/src/xoroshiro/xoroshiro64star.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,6 @@ impl RngCore for Xoroshiro64Star {
fn fill_bytes(&mut self, dest: &mut [u8]) {
fill_bytes_via_next(self, dest);
}

#[inline]
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand_core::Error> {
self.fill_bytes(dest);
Ok(())
}
}

impl SeedableRng for Xoroshiro64Star {
Expand Down
6 changes: 0 additions & 6 deletions crates/gpu_rand/src/xoroshiro/xoroshiro64starstar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,6 @@ impl RngCore for Xoroshiro64StarStar {
fn fill_bytes(&mut self, dest: &mut [u8]) {
fill_bytes_via_next(self, dest);
}

#[inline]
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand_core::Error> {
self.fill_bytes(dest);
Ok(())
}
}

impl SeedableRng for Xoroshiro64StarStar {
Expand Down
8 changes: 1 addition & 7 deletions crates/gpu_rand/src/xoroshiro/xoshiro128plus.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use rand_core::impls::{fill_bytes_via_next, next_u64_via_u32};
use rand_core::le::read_u32_into;
use rand_core::{Error, RngCore, SeedableRng};
use rand_core::{RngCore, SeedableRng};

/// A xoshiro128+ random number generator.
///
Expand Down Expand Up @@ -84,12 +84,6 @@ impl RngCore for Xoshiro128Plus {
fn fill_bytes(&mut self, dest: &mut [u8]) {
fill_bytes_via_next(self, dest);
}

#[inline]
fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {
self.fill_bytes(dest);
Ok(())
}
}

#[cfg(test)]
Expand Down
Loading
Loading