diff --git a/crates/cuda_std/Cargo.toml b/crates/cuda_std/Cargo.toml index 4e120f53..209929ce 100644 --- a/crates/cuda_std/Cargo.toml +++ b/crates/cuda_std/Cargo.toml @@ -8,6 +8,7 @@ repository = "https://github.com/Rust-GPU/Rust-CUDA" readme = "../../README.md" [dependencies] +glam = { version = ">=0.22", default-features = false, features = ["libm", "cuda", "bytemuck"] } vek = { version = "0.17.1", default-features = false, features = ["libm"] } cuda_std_macros = { version = "0.2", path = "../cuda_std_macros" } half = "2.4.1" diff --git a/crates/cuda_std/src/lib.rs b/crates/cuda_std/src/lib.rs index 357922b5..752c07f1 100644 --- a/crates/cuda_std/src/lib.rs +++ b/crates/cuda_std/src/lib.rs @@ -49,7 +49,9 @@ mod float_ext; pub use cuda_std_macros::*; pub use float::GpuFloat; pub use float_ext::*; +pub use glam; pub use half; +#[deprecated(note = "The `vek` module is deprecated, use `glam` instead.")] pub use vek; pub use half::{bf16, f16}; diff --git a/crates/cuda_std/src/rt/mod.rs b/crates/cuda_std/src/rt/mod.rs index 36ce621d..ed6440ab 100644 --- a/crates/cuda_std/src/rt/mod.rs +++ b/crates/cuda_std/src/rt/mod.rs @@ -152,23 +152,23 @@ impl<'a> From<&'a GridSize> for GridSize { other.clone() } } -impl From> for GridSize { - fn from(vec: vek::Vec2) -> Self { +impl From for GridSize { + fn from(vec: glam::UVec2) -> Self { GridSize::xy(vec.x, vec.y) } } -impl From> for GridSize { - fn from(vec: vek::Vec3) -> Self { +impl From for GridSize { + fn from(vec: glam::UVec3) -> Self { GridSize::xyz(vec.x, vec.y, vec.z) } } -impl From> for GridSize { - fn from(vec: vek::Vec2) -> Self { +impl From for GridSize { + fn from(vec: glam::USizeVec2) -> Self { GridSize::xy(vec.x as u32, vec.y as u32) } } -impl From> for GridSize { - fn from(vec: vek::Vec3) -> Self { +impl From for GridSize { + fn from(vec: glam::USizeVec3) -> Self { GridSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32) } } @@ -228,23 +228,23 @@ impl<'a> From<&'a BlockSize> for BlockSize { other.clone() } } -impl From> for BlockSize { - fn from(vec: vek::Vec2) -> Self { +impl From for BlockSize { + fn from(vec: glam::UVec2) -> Self { BlockSize::xy(vec.x, vec.y) } } -impl From> for BlockSize { - fn from(vec: vek::Vec3) -> Self { +impl From for BlockSize { + fn from(vec: glam::UVec3) -> Self { BlockSize::xyz(vec.x, vec.y, vec.z) } } -impl From> for BlockSize { - fn from(vec: vek::Vec2) -> Self { +impl From for BlockSize { + fn from(vec: glam::USizeVec2) -> Self { BlockSize::xy(vec.x as u32, vec.y as u32) } } -impl From> for BlockSize { - fn from(vec: vek::Vec3) -> Self { +impl From for BlockSize { + fn from(vec: glam::USizeVec3) -> Self { BlockSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32) } } diff --git a/crates/cuda_std/src/thread.rs b/crates/cuda_std/src/thread.rs index 1f70bbb5..fc40a287 100644 --- a/crates/cuda_std/src/thread.rs +++ b/crates/cuda_std/src/thread.rs @@ -19,7 +19,7 @@ // TODO: write some docs about the terms used in this module. use cuda_std_macros::gpu_only; -use vek::{Vec2, Vec3}; +use glam::{UVec2, UVec3}; // different calling conventions dont exist in nvptx, so we just use C as a placeholder. extern "C" { @@ -152,7 +152,7 @@ pub fn grid_dim_z() -> u32 { /// Gets the 3d index of the thread currently executing the kernel. #[gpu_only] #[inline(always)] -pub fn thread_idx() -> Vec3 { +pub fn thread_idx() -> UVec3 { unsafe { Vec3::new( __nvvm_thread_idx_x(), @@ -165,7 +165,7 @@ pub fn thread_idx() -> Vec3 { /// Gets the 3d index of the block that the thread currently executing the kernel is located in. #[gpu_only] #[inline(always)] -pub fn block_idx() -> Vec3 { +pub fn block_idx() -> UVec3 { unsafe { Vec3::new( __nvvm_block_idx_x(), @@ -179,7 +179,7 @@ pub fn block_idx() -> Vec3 { /// how many threads exist in each thread block in every direction. #[gpu_only] #[inline(always)] -pub fn block_dim() -> Vec3 { +pub fn block_dim() -> UVec3 { unsafe { Vec3::new( __nvvm_block_dim_x(), @@ -193,7 +193,7 @@ pub fn block_dim() -> Vec3 { /// how many thread blocks exist in each grid in every direction. #[gpu_only] #[inline(always)] -pub fn grid_dim() -> Vec3 { +pub fn grid_dim() -> UVec3 { unsafe { Vec3::new( __nvvm_grid_dim_x(), @@ -232,18 +232,18 @@ pub fn index_1d() -> u32 { } #[inline(always)] -pub fn index_2d() -> Vec2 { +pub fn index_2d() -> UVec2 { let i = thread_idx_x() + block_idx_x() * block_dim_x(); let j = thread_idx_y() + block_idx_y() * block_dim_y(); - Vec2::new(i, j) + UVec2::new(i, j) } #[inline(always)] -pub fn index_3d() -> Vec3 { +pub fn index_3d() -> UVec3 { let i = thread_idx_x() + block_idx_x() * block_dim_x(); let j = thread_idx_y() + block_idx_y() * block_dim_y(); let k = thread_idx_z() + block_idx_z() * block_dim_z(); - Vec3::new(i, j, k) + UVec3::new(i, j, k) } /// Whether this is the first thread (not the first thread to be executing). This function is guaranteed @@ -251,7 +251,7 @@ pub fn index_3d() -> Vec3 { /// once. #[inline(always)] pub fn first() -> bool { - block_idx() == Vec3::zero() && thread_idx() == Vec3::zero() + block_idx() == UVec3::ZERO && thread_idx() == UVec3::ZERO } /// Gets the number of threads inside of a warp. Currently 32 threads on every GPU architecture. diff --git a/crates/cust/CHANGELOG.md b/crates/cust/CHANGELOG.md index 4dac02dc..904995ab 100644 --- a/crates/cust/CHANGELOG.md +++ b/crates/cust/CHANGELOG.md @@ -4,6 +4,7 @@ Notable changes to this project will be documented in this file. ## Unreleased +- `cuda_std::vek` is now deprecated. Use `cuda_std::glam`. - Add `memory::memcpy_dtoh` to allow copying from device to host. - `DeviceSlice` is represented as a slice again, but as `[()]` instead of `[T]`. - Reimplemented `Index` and `IndexMut` for `DeviceSlice` and removed `DeviceSlice::index`. diff --git a/crates/optix_device/Cargo.toml b/crates/optix_device/Cargo.toml index 2252eb41..f1f6575f 100644 --- a/crates/optix_device/Cargo.toml +++ b/crates/optix_device/Cargo.toml @@ -2,12 +2,17 @@ name = "optix_device" version = "0.1.0" edition = "2021" -authors = ["Anders Langlands ", "Riccardo D'Ambrosio "] +authors = [ + "Anders Langlands ", + "Riccardo D'Ambrosio " +] [dependencies] bitflags = "2.8" cuda_std = { version = "0.2", path = "../cuda_std" } -glam = { version = "0.29", features=["cuda", "libm"], default-features=false } paste = "1.0.15" seq-macro = "0.3.5" cust_core = { version = "0.1", path = "../cust_core" } + +[target.'cfg(not(target_os = "cuda"))'.dependencies] +glam = { version = "0.29", features = ["cuda"], default-features = false } diff --git a/crates/optix_device/src/hit.rs b/crates/optix_device/src/hit.rs index a907f004..27abe78e 100644 --- a/crates/optix_device/src/hit.rs +++ b/crates/optix_device/src/hit.rs @@ -1,6 +1,6 @@ #[cfg(target_os = "cuda")] use core::arch::asm; -use cuda_std::gpu_only; +use cuda_std::{glam, gpu_only}; use glam::Vec3; /// The type of primitive that a ray hit. #[repr(u32)] diff --git a/crates/optix_device/src/lib.rs b/crates/optix_device/src/lib.rs index 76c0539f..c13f28ad 100644 --- a/crates/optix_device/src/lib.rs +++ b/crates/optix_device/src/lib.rs @@ -14,8 +14,7 @@ pub mod trace; pub mod transform; pub mod util; -use cuda_std::*; -pub use glam; +use cuda_std::{glam, *}; use glam::UVec3; pub use misc::*; diff --git a/crates/optix_device/src/ray.rs b/crates/optix_device/src/ray.rs index 48939270..258ca99d 100644 --- a/crates/optix_device/src/ray.rs +++ b/crates/optix_device/src/ray.rs @@ -1,7 +1,7 @@ use crate::trace::*; #[cfg(target_os = "cuda")] use core::arch::asm; -use cuda_std::gpu_only; +use cuda_std::{glam, gpu_only}; use glam::Vec3; /// Returns the ray origin that was passed into [`trace`] in world-space. diff --git a/crates/optix_device/src/sys.rs b/crates/optix_device/src/sys.rs index b8c72713..946a0293 100644 --- a/crates/optix_device/src/sys.rs +++ b/crates/optix_device/src/sys.rs @@ -3,7 +3,7 @@ use crate::trace::{RayFlags, TraversableHandle}; #[cfg(target_os = "cuda")] use core::arch::asm; -use cuda_std::gpu_only; +use cuda_std::{glam, gpu_only}; use glam::Vec3; use paste::paste; diff --git a/examples/cuda/cpu/path_tracer/Cargo.toml b/examples/cuda/cpu/path_tracer/Cargo.toml index 763ba0e5..8f3bac02 100644 --- a/examples/cuda/cpu/path_tracer/Cargo.toml +++ b/examples/cuda/cpu/path_tracer/Cargo.toml @@ -4,9 +4,9 @@ version = "0.1.0" edition = "2018" [dependencies] -vek = { version = "0.17.1", features = ["bytemuck", "mint"] } +glam = { version = "0.30.1", features = ["bytemuck", "cuda"] } bytemuck = { version = "1.21", features = ["derive"] } -cust = { version = "0.3", path = "../../../../crates/cust", features = ["impl_vek"] } +cust = { version = "0.3", path = "../../../../crates/cust", features = ["impl_glam"] } image = "0.25.5" path_tracer_gpu = { path = "../../gpu/path_tracer_gpu" } gpu_rand = { version = "0.1", path = "../../../../crates/gpu_rand" } diff --git a/examples/cuda/cpu/path_tracer/src/common.rs b/examples/cuda/cpu/path_tracer/src/common.rs index bfe69df0..4eea72ea 100644 --- a/examples/cuda/cpu/path_tracer/src/common.rs +++ b/examples/cuda/cpu/path_tracer/src/common.rs @@ -1,14 +1,14 @@ +use glam::{Vec2, Vec3}; use glium::glutin::event::{ ElementState, Event, MouseButton, MouseScrollDelta, VirtualKeyCode, WindowEvent, }; use path_tracer_gpu::Viewport; -use vek::{Vec2, Vec3}; #[derive(Debug, Clone, Copy, PartialEq)] pub struct Camera { - pub origin: Vec3, - pub lookat: Vec3, - pub vup: Vec3, + pub origin: Vec3, + pub lookat: Vec3, + pub vup: Vec3, pub fov: f32, pub aspect_ratio: f32, } @@ -43,7 +43,7 @@ pub struct CameraController { } impl CameraController { - pub fn new(dimensions: Vec2) -> Self { + pub fn new(dimensions: USizeVec2) -> Self { CameraController { sensitivity: 0.1, last_mouse_pos: dimensions.numcast().unwrap() / 2.0, diff --git a/examples/cuda/cpu/path_tracer/src/cpu/mod.rs b/examples/cuda/cpu/path_tracer/src/cpu/mod.rs index 217bde5e..1f995550 100644 --- a/examples/cuda/cpu/path_tracer/src/cpu/mod.rs +++ b/examples/cuda/cpu/path_tracer/src/cpu/mod.rs @@ -1,5 +1,6 @@ use std::time::Duration; +use glam::{Clamp, Vec2, Vec3}; use gpu_rand::{DefaultRand, GpuRand}; use imgui::Ui; use path_tracer_gpu::{ @@ -7,13 +8,12 @@ use path_tracer_gpu::{ }; use rayon::prelude::*; use sysinfo::System; -use vek::{Clamp, Vec2, Vec3}; use crate::{common::Camera, cuda::SEED}; pub struct CpuRenderer { // this is basically the cuda buffers but not gpu buffers. - accumulated_buffer: Vec>, + accumulated_buffer: Vec, out_buffer: Vec>, viewport: Viewport, @@ -23,7 +23,7 @@ pub struct CpuRenderer { } impl CpuRenderer { - pub fn new(dimensions: Vec2, camera: &Camera, scene: &Scene) -> Self { + pub fn new(dimensions: USizeVec2, camera: &Camera, scene: &Scene) -> Self { let accumulated_buffer = vec![Vec3::zero(); dimensions.product()]; let out_buffer = vec![Vec3::zero(); dimensions.product()]; @@ -67,7 +67,7 @@ impl CpuRenderer { new_camera.as_viewport(&mut self.viewport); } - pub fn resize(&mut self, dimensions: Vec2) { + pub fn resize(&mut self, dimensions: USizeVec2) { self.accumulated_buffer .resize(dimensions.product(), Vec3::zero()); self.out_buffer.resize(dimensions.product(), Vec3::zero()); diff --git a/examples/cuda/cpu/path_tracer/src/cuda/data.rs b/examples/cuda/cpu/path_tracer/src/cuda/data.rs index 9e71cef6..a253aba9 100644 --- a/examples/cuda/cpu/path_tracer/src/cuda/data.rs +++ b/examples/cuda/cpu/path_tracer/src/cuda/data.rs @@ -5,9 +5,9 @@ use cust::{ memory::{DeviceBuffer, DeviceCopy, UnifiedBuffer}, util::SliceExt, }; +use glam::{Vec2, Vec3}; use gpu_rand::DefaultRand; use path_tracer_gpu::{material::MaterialKind, scene::Scene, Object, Viewport}; -use vek::{Vec2, Vec3}; use super::SEED; diff --git a/examples/cuda/cpu/path_tracer/src/main.rs b/examples/cuda/cpu/path_tracer/src/main.rs index 4311c172..070634ba 100644 --- a/examples/cuda/cpu/path_tracer/src/main.rs +++ b/examples/cuda/cpu/path_tracer/src/main.rs @@ -6,6 +6,7 @@ pub mod renderer; pub mod viewer; use common::Camera; +use glam::Vec3; use path_tracer_gpu::{ material::{DielectricMaterial, DiffuseMaterial, MaterialKind, MetallicMaterial}, scene::Scene, @@ -13,7 +14,6 @@ use path_tracer_gpu::{ Object, }; use std::error::Error; -use vek::Vec3; pub const WIDTH: u32 = 1920; pub const HEIGHT: u32 = 1080; diff --git a/examples/cuda/gpu/path_tracer_gpu/src/lib.rs b/examples/cuda/gpu/path_tracer_gpu/src/lib.rs index 51614f77..c3a0e578 100644 --- a/examples/cuda/gpu/path_tracer_gpu/src/lib.rs +++ b/examples/cuda/gpu/path_tracer_gpu/src/lib.rs @@ -11,20 +11,19 @@ pub mod render_kernels; pub mod scene; pub mod sphere; -pub use cuda_std::vek; +pub use cuda_std::glam; use cust_core::DeviceCopy; use enum_dispatch::enum_dispatch; use hittable::{HitRecord, Hittable}; use sphere::Sphere; -pub type Vec3 = vek::Vec3; -pub type Point = vek::Vec3; -pub type Vec2 = vek::Vec2; +use glam::{USizeVec2, Vec2, Vec3}; +pub type Point = Vec3; #[derive(Default, Clone, Copy, DeviceCopy)] #[repr(C)] pub struct Viewport { - pub bounds: vek::Vec2, + pub bounds: USizeVec2, pub lower_left: Vec3, pub horizontal: Vec3, pub vertical: Vec3, diff --git a/examples/cuda/gpu/path_tracer_gpu/src/render.rs b/examples/cuda/gpu/path_tracer_gpu/src/render.rs index 9767b4fd..c4fa7303 100644 --- a/examples/cuda/gpu/path_tracer_gpu/src/render.rs +++ b/examples/cuda/gpu/path_tracer_gpu/src/render.rs @@ -8,7 +8,7 @@ pub fn color(ray: Ray) -> Vec3 { (1.0 - t) * Vec3::one() + t * Vec3::new(0.5, 0.7, 1.0) } -pub fn generate_ray(idx: vek::Vec2, view: &Viewport, offset: Vec2) -> Ray { +pub fn generate_ray(idx: UVec2, view: &Viewport, offset: Vec2) -> Ray { let uv = (idx.numcast::().unwrap() + offset) / view.bounds.numcast().unwrap(); Ray { origin: view.origin, diff --git a/examples/cuda/gpu/path_tracer_gpu/src/render_kernels.rs b/examples/cuda/gpu/path_tracer_gpu/src/render_kernels.rs index ef7d8d96..a036a884 100644 --- a/examples/cuda/gpu/path_tracer_gpu/src/render_kernels.rs +++ b/examples/cuda/gpu/path_tracer_gpu/src/render_kernels.rs @@ -1,5 +1,6 @@ use crate::{render::*, scene::Scene, *}; -use cuda_std::{vek::Clamp, *}; +use cuda_std::*; +use glam::{U8Vec3, Vec2, Vec3}; use gpu_rand::{DefaultRand, GpuRand}; #[kernel] @@ -38,7 +39,7 @@ pub unsafe fn scale_buffer(fb: *const Vec3, out: *mut Vec3, samples: u32, view: /// Postprocesses a (scaled) buffer into a final u8 buffer. #[kernel] -pub unsafe fn postprocess(fb: *const Vec3, out: *mut vek::Vec3, view: Viewport) { +pub unsafe fn postprocess(fb: *const Vec3, out: *mut U8Vec3, view: Viewport) { let idx_2d = thread::index_2d(); if idx_2d.x >= view.bounds.x as u32 || idx_2d.y >= view.bounds.y as u32 { return; @@ -50,7 +51,7 @@ pub unsafe fn postprocess(fb: *const Vec3, out: *mut vek::Vec3, view: Viewpo let gamma_corrected = original.sqrt(); *out = (gamma_corrected * 255.0) - .clamped(Vec3::zero(), Vec3::broadcast(255.0)) + .clamp(Vec3::zero(), Vec3::broadcast(255.0)) .numcast() .unwrap(); }