diff --git a/all-is-cubes-gpu/benches/wgpu.rs b/all-is-cubes-gpu/benches/wgpu.rs index 991844474..fd4b31744 100644 --- a/all-is-cubes-gpu/benches/wgpu.rs +++ b/all-is-cubes-gpu/benches/wgpu.rs @@ -18,7 +18,7 @@ use all_is_cubes_render::camera::{GraphicsOptions, StandardCameras, Viewport}; use all_is_cubes_render::Flaws; use all_is_cubes_render::HeadlessRenderer; -use all_is_cubes_gpu::in_wgpu::{headless, init, LightTexture}; +use all_is_cubes_gpu::in_wgpu::{headless, init, LightChunk, LightTexture}; fn main() { let runtime = tokio::runtime::Builder::new_multi_thread().build().unwrap(); @@ -168,11 +168,13 @@ fn light_benches(runtime: &Runtime, c: &mut Criterion, instance: &wgpu::Instance LightTexture::new("lt", &device, bounds.size(), wgpu::TextureUsages::empty()); let space = Space::builder(bounds).build(); + let updates = LightChunk::all_in_region(bounds); + // update_scatter() will do nothing if not mapped first texture.ensure_mapped(&queue, &space, bounds); b.iter_with_large_drop(|| { - texture.update_scatter(&device, &queue, &space, space.bounds().interior_iter()); + texture.update_scatter(&device, &queue, &space, updates.iter().copied()); scopeguard::guard((), |()| { // flush wgpu's buffering of copy commands (not sure if this is effective). diff --git a/all-is-cubes-gpu/src/in_wgpu.rs b/all-is-cubes-gpu/src/in_wgpu.rs index c3b954417..39b83c60d 100644 --- a/all-is-cubes-gpu/src/in_wgpu.rs +++ b/all-is-cubes-gpu/src/in_wgpu.rs @@ -52,7 +52,7 @@ pub mod headless; pub mod init; mod light_texture; #[doc(hidden)] // public for benchmark -pub use light_texture::LightTexture; +pub use light_texture::{LightChunk, LightTexture}; mod pipelines; mod poll; mod postprocess; diff --git a/all-is-cubes-gpu/src/in_wgpu/light_texture.rs b/all-is-cubes-gpu/src/in_wgpu/light_texture.rs index b5fcc9dc8..fa5cef8dd 100644 --- a/all-is-cubes-gpu/src/in_wgpu/light_texture.rs +++ b/all-is-cubes-gpu/src/in_wgpu/light_texture.rs @@ -1,3 +1,5 @@ +use std::array; + use cfg_if::cfg_if; #[cfg(feature = "auto-threads")] use rayon::{ @@ -7,11 +9,12 @@ use rayon::{ use all_is_cubes::math::{ Aab, Axis, Cube, FaceMap, FreeCoordinate, GridAab, GridCoordinate, GridSize, GridSizeCoord, + PositiveSign, }; use all_is_cubes::space::Space; use all_is_cubes::{ - euclid::{Box3D, Vector3D}, - math::PositiveSign, + euclid::{vec3, Box3D, Point3D, Size3D, Vector3D}, + math::VectorOps, }; use all_is_cubes_render::camera::Camera; @@ -52,6 +55,60 @@ fn visible_light_volume(space_bounds: GridAab, camera: &Camera) -> GridAab { .unwrap_or(GridAab::ORIGIN_CUBE) } +/// Size of the minimum unit in which we partially update a [`LightTexture`]. +/// This size is not visible outside this module except as the granularity of [`LightChunk`] values. +const LIGHT_CHUNK_SIZE: GridSize = GridSize::new(16, 1, 1); +#[allow(clippy::cast_possible_wrap)] +const LIGHT_CHUNK_SIZE_I32: Size3D = Size3D::new( + LIGHT_CHUNK_SIZE.width as i32, + LIGHT_CHUNK_SIZE.height as i32, + LIGHT_CHUNK_SIZE.depth as i32, +); +const LIGHT_CHUNK_VOLUME: usize = + (LIGHT_CHUNK_SIZE.width * LIGHT_CHUNK_SIZE.height * LIGHT_CHUNK_SIZE.depth) as usize; + +/// Coordinates for a chunk of light values in a [`LightTexture`] to update. +/// These are generally much smaller than mesh chunks. +/// +/// This may be lossily converted from a [`Cube`] to find the containing chunk. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +#[doc(hidden)] // public for benchmark +pub struct LightChunk(Point3D); + +impl LightChunk { + pub fn new(cube: Cube) -> Self { + LightChunk( + cube.lower_bounds() + .div_euclid(&LIGHT_CHUNK_SIZE_I32) + .cast_unit(), + ) + } + + pub fn first_cube(self) -> Cube { + Cube::from( + self.0 + .cast_unit::() + .to_vector() + .zip(LIGHT_CHUNK_SIZE_I32.to_vector(), |coord, scale| { + coord * scale + }) + .to_point(), + ) + } + + /// For testing only. Implemented in a brute-force way because it doesn’t need to be cheaper. + pub fn all_in_region(region: GridAab) -> Vec { + let mut chunks: Vec = region + .interior_iter() + .map(LightChunk::new) + .collect::>() // deduplicate + .into_iter() + .collect(); + chunks.sort_by_key(|chunk| <[i32; 3]>::from(chunk.first_cube())); + chunks + } +} + /// Keeps a 3D [`wgpu::Texture`] up to date with the light data from a [`Space`]. /// /// [`Space`] coordinates are mapped directly to texel coordinates, with modulo wrap-around. @@ -80,7 +137,7 @@ pub struct LightTexture { } impl LightTexture { - const COPY_BUFFER_TEXELS: usize = 1024; + const COPY_BUFFER_CHUNKS: usize = 512; const COMPONENTS: usize = 4; /// Compute the appropriate size of light texture for the given conditions. @@ -92,6 +149,7 @@ impl LightTexture { // Extra volume of 1 extra cube around all sides automatically captures sky light. let space_size = space_bounds.size() + GridSize::splat(2); + // Compute the size that we need to accomodate the camera view distance. // times 2 for radius, plus one to account for the effect of rounding up points to // containing cubes. let camera_size = GridSize::splat( @@ -103,13 +161,24 @@ impl LightTexture { // The texture need not be bigger than the Space or bigger than the viewable diameter. // But it must also be within wgpu's limits. - space_size.min(camera_size).clamp( - GridSize::splat(1), - GridSize::splat(limits.max_texture_dimension_3d), + let visually_needed_size = space_size.min(camera_size).max(GridSize::splat(1)); + + // Round up to a multiple of LIGHT_CHUNK_SIZE; + // this part is for the sake of the implementation of updating rather than because + // we need the data. + let chunked_size = + visually_needed_size.zip(LIGHT_CHUNK_SIZE.cast_unit(), |ss, cs| ss.div_ceil(cs) * cs); + + // Limit to wgpu limits, rounded down to chunk. + chunked_size.min( + GridSize::splat(limits.max_texture_dimension_3d) + .zip(LIGHT_CHUNK_SIZE.cast_unit(), |ss, cs| (ss / cs) * cs), ) } /// Construct a new texture of the specified size with no data. + /// + /// The size must be a size returned by [`LightTexture::choose_size()`]. pub fn new( label_prefix: &str, device: &wgpu::Device, @@ -135,7 +204,10 @@ impl LightTexture { texture, copy_buffer: device.create_buffer(&wgpu::BufferDescriptor { label: Some(&format!("{label_prefix} space light copy buffer")), - size: u64::try_from(Self::COPY_BUFFER_TEXELS * Self::COMPONENTS).unwrap(), + size: u64::try_from( + Self::COPY_BUFFER_CHUNKS * LIGHT_CHUNK_VOLUME * Self::COMPONENTS, + ) + .unwrap(), usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::COPY_SRC, mapped_at_creation: false, }), @@ -341,29 +413,43 @@ impl LightTexture { device: &wgpu::Device, queue: &wgpu::Queue, space: &Space, - cubes: impl IntoIterator, + chunks: impl IntoIterator, ) -> usize { let mut total_count = 0; let texture_size = extent_to_size3d(self.texture.size()).to_i32(); // Filter out out-of-bounds cubes. - let cubes = cubes + let chunks = chunks .into_iter() - .filter(|&cube| self.mapped_region.contains_cube(cube)); + .filter(|&chunk| self.mapped_region.contains_cube(chunk.first_cube())); // Break into batches of our buffer size. - for cube_batch in &itertools::Itertools::chunks(cubes, Self::COPY_BUFFER_TEXELS) { + for chunk_batch in &itertools::Itertools::chunks(chunks, Self::COPY_BUFFER_CHUNKS) { #[allow(clippy::large_stack_arrays)] - let mut data: [Texel; Self::COPY_BUFFER_TEXELS] = - [[0; Self::COMPONENTS]; Self::COPY_BUFFER_TEXELS]; + let mut data: [[Texel; LIGHT_CHUNK_VOLUME]; Self::COPY_BUFFER_CHUNKS] = + [[[0; Self::COMPONENTS]; LIGHT_CHUNK_VOLUME]; Self::COPY_BUFFER_CHUNKS]; let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: Some("space light scatter-copy"), }); let mut batch_count = 0; - for (index, cube) in cube_batch.into_iter().enumerate() { - data[index] = space.get_lighting(cube).as_texel(); + for (index_in_batch, chunk) in chunk_batch.into_iter().enumerate() { + let first_cube = chunk.first_cube(); + data[index_in_batch] = array::from_fn(|texel_index_in_chunk| { + #[allow(clippy::cast_possible_wrap)] // only as big as LIGHT_CHUNK_VOLUME + let texel_index_in_chunk = texel_index_in_chunk as i32; + let offset = vec3( + texel_index_in_chunk.rem_euclid(LIGHT_CHUNK_SIZE_I32.width), + texel_index_in_chunk + .div_euclid(LIGHT_CHUNK_SIZE_I32.width) + .rem_euclid(LIGHT_CHUNK_SIZE_I32.height), + texel_index_in_chunk + .div_euclid(LIGHT_CHUNK_SIZE_I32.width * LIGHT_CHUNK_SIZE_I32.height), + ); + + space.get_lighting(first_cube + offset).as_texel() + }); // TODO: When compute shaders are available, use a compute shader to do these // scattered writes instead of issuing individual commands. @@ -371,7 +457,8 @@ impl LightTexture { wgpu::ImageCopyBuffer { buffer: &self.copy_buffer, layout: wgpu::ImageDataLayout { - offset: (index * Self::COMPONENTS) as u64, + offset: (index_in_batch * (LIGHT_CHUNK_VOLUME * Self::COMPONENTS)) + as u64, bytes_per_row: None, rows_per_image: None, }, @@ -380,15 +467,11 @@ impl LightTexture { texture: &self.texture, mip_level: 0, origin: point_to_origin( - cube.lower_bounds().rem_euclid(&texture_size).to_u32(), + first_cube.lower_bounds().rem_euclid(&texture_size).to_u32(), ), aspect: wgpu::TextureAspect::All, }, - wgpu::Extent3d { - width: 1, - height: 1, - depth_or_array_layers: 1, - }, + size3d_to_extent(LIGHT_CHUNK_SIZE), ); batch_count += 1; @@ -399,7 +482,11 @@ impl LightTexture { // To do this optimally, `StagingBelt` will need to be modified to allow // us accessing its buffers to issue a `copy_buffer_to_texture` instead of // it issuing a `copy_buffer_to_buffer`. - queue.write_buffer(&self.copy_buffer, 0, data[..batch_count].as_flattened()); + queue.write_buffer( + &self.copy_buffer, + 0, + data[..batch_count].as_flattened().as_flattened(), + ); queue.submit([encoder.finish()]); } diff --git a/all-is-cubes-gpu/src/in_wgpu/space.rs b/all-is-cubes-gpu/src/in_wgpu/space.rs index 137ca648b..cc074bc32 100644 --- a/all-is-cubes-gpu/src/in_wgpu/space.rs +++ b/all-is-cubes-gpu/src/in_wgpu/space.rs @@ -11,8 +11,8 @@ use all_is_cubes::chunking::ChunkPos; use all_is_cubes::content::palette; use all_is_cubes::listen::{self, Listen as _, Listener}; use all_is_cubes::math::{ - rgba_const, Cube, Face6, FreeCoordinate, FreePoint, GridAab, GridCoordinate, GridPoint, - GridSize, GridVector, Rgb, Rgba, Wireframe as _, ZeroOne, + rgba_const, Face6, FreeCoordinate, FreePoint, GridAab, GridCoordinate, GridPoint, GridSize, + GridVector, Rgb, Rgba, Wireframe as _, ZeroOne, }; use all_is_cubes::raycast::Ray; #[cfg(feature = "rerun")] @@ -29,6 +29,7 @@ use all_is_cubes_render::{Flaws, RenderError}; use crate::in_wgpu::block_texture::BlockTextureViews; use crate::in_wgpu::frame_texture::FramebufferTextures; use crate::in_wgpu::glue::{to_wgpu_color, to_wgpu_index_format}; +use crate::in_wgpu::light_texture::LightChunk; use crate::in_wgpu::pipelines::Pipelines; use crate::in_wgpu::skybox; use crate::in_wgpu::vertex::{WgpuInstanceData, WgpuLinesVertex}; @@ -1041,7 +1042,7 @@ struct SpaceRendererTodo { /// None means do a full space reupload. /// /// TODO: experiment with different granularities of light invalidation (chunks, dirty rects, etc.) - light: Option>, + light: Option>, sky: bool, } @@ -1065,7 +1066,7 @@ impl listen::Store for SpaceRendererTodo { SpaceChange::CubeLight { cube } => { // None means we're already at "update everything" if let Some(set) = &mut self.light { - set.insert(cube); + set.insert(LightChunk::new(cube)); } } SpaceChange::CubeBlock { .. } => {} diff --git a/all-is-cubes-gpu/tests/shaders/tests.rs b/all-is-cubes-gpu/tests/shaders/tests.rs index ecfa1486c..35b2c9360 100644 --- a/all-is-cubes-gpu/tests/shaders/tests.rs +++ b/all-is-cubes-gpu/tests/shaders/tests.rs @@ -1,11 +1,12 @@ use std::sync::Arc; -use all_is_cubes::math::GridSize; +use all_is_cubes::math::{ps64, GridSize, Rgb}; use all_is_cubes::raycast::scale_to_integer_step; +use all_is_cubes::space::Space; use all_is_cubes::universe::Universe; use all_is_cubes::util::YieldProgress; -use all_is_cubes_gpu::in_wgpu::{init, LightTexture}; +use all_is_cubes_gpu::in_wgpu::{init, LightChunk, LightTexture}; use crate::harness::run_shader_test; use crate::wgsl::{frag_expr, to_wgsl}; @@ -77,10 +78,10 @@ async fn scale_to_integer_step_test() { /// Not a shader test per se, but a test that the light texture updates correctly. #[tokio::test] #[rstest::rstest] -async fn light_texture_write_read(#[values(false, true)] use_scatter: bool) { - use all_is_cubes::math::Rgb; - use all_is_cubes::space::Space; - +async fn light_texture_write_read( + #[values(false, true)] use_scatter: bool, + #[values(16, 30, 50)] space_size_param: u32, +) { let ((device, queue), (_universe, space, dark_space)) = tokio::join!( async { let instance = crate::harness::instance().await; @@ -94,10 +95,12 @@ async fn light_texture_write_read(#[values(false, true)] use_scatter: bool) { }, async { let mut universe = Universe::new(); + // TODO: the test would be more rigorous with a precise size rather than the rounding + // that lighting_bench_space() does. let space = all_is_cubes::content::testing::lighting_bench_space( &mut universe, YieldProgress::noop(), - GridSize::new(32, 32, 32), + GridSize::splat(space_size_param), ) .await .unwrap(); @@ -112,7 +115,7 @@ async fn light_texture_write_read(#[values(false, true)] use_scatter: bool) { let mut lt = LightTexture::new( "light_texture_write_test", &device, - GridSize::splat(32), + LightTexture::choose_size(&device.limits(), space.bounds(), ps64(1e6)), wgpu::TextureUsages::COPY_SRC, ); @@ -120,7 +123,12 @@ async fn light_texture_write_read(#[values(false, true)] use_scatter: bool) { // First initialize with black from dark_space, then refresh it using update_scatter(). lt.ensure_mapped(&queue, &dark_space, space.bounds()); - lt.update_scatter(&device, &queue, &space, space.bounds().interior_iter()); + lt.update_scatter( + &device, + &queue, + &space, + LightChunk::all_in_region(space.bounds()).into_iter(), + ); } else { lt.ensure_mapped(&queue, &space, space.bounds()); }