From ad9ef6f018138c68da5a6e151966f2c8aa8e193f Mon Sep 17 00:00:00 2001 From: Anton Suprunchuk Date: Thu, 2 Apr 2026 23:29:36 -0300 Subject: [PATCH 1/2] feat: partial tree updates --- Cargo.lock | 204 +++++++++++++++++++++++- Cargo.toml | 7 +- benches/renderer_partial_updates.rs | 237 ++++++++++++++++++++++++++++ src/renderer/draw_queue.rs | 65 ++++++++ src/renderer/types.rs | 7 + 5 files changed, 515 insertions(+), 5 deletions(-) create mode 100644 benches/renderer_partial_updates.rs diff --git a/Cargo.lock b/Cargo.lock index 5193b3a..a9535e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,6 +94,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.14" @@ -355,6 +361,12 @@ dependencies = [ "wayland-client", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.0.98" @@ -394,6 +406,58 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" + [[package]] name = "codespan-reporting" version = "0.12.0" @@ -485,6 +549,42 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -616,9 +716,9 @@ dependencies = [ [[package]] name = "easy-tree" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43f560da9bebb976828efaccac969205c68852b12cf9f88d3a4a6bcd8f7fca34" +checksum = "0fe19fbb9a76b6acd96ed1ff3ab680675158b88a6b16f5eb988c475f2d584c84" [[package]] name = "either" @@ -972,6 +1072,7 @@ version = "0.15.0" dependencies = [ "ahash", "bytemuck", + "criterion", "easy-tree", "env_logger", "euclid", @@ -1033,6 +1134,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hexf-parse" version = "0.2.1" @@ -1105,12 +1212,32 @@ dependencies = [ "syn", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi 0.5.2", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.12.1" @@ -1120,6 +1247,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + [[package]] name = "jni" version = "0.21.1" @@ -1851,6 +1984,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "orbclient" version = "0.3.47" @@ -1951,6 +2090,34 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "png" version = "0.17.13" @@ -1972,7 +2139,7 @@ checksum = "a3ed00ed3fbf728b5816498ecd316d1716eecaced9c0c8d2c5a6740ca214985b" dependencies = [ "cfg-if", "concurrent-queue", - "hermit-abi", + "hermit-abi 0.4.0", "pin-project-lite", "rustix 0.38.44", "tracing", @@ -2120,7 +2287,7 @@ dependencies = [ "built", "cfg-if", "interpolate_name", - "itertools", + "itertools 0.12.1", "libc", "libfuzzer-sys", "log", @@ -2343,6 +2510,19 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + [[package]] name = "serde_spanned" version = "0.6.9" @@ -2637,6 +2817,16 @@ dependencies = [ "tracing", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "toml" version = "0.8.15" @@ -3606,6 +3796,12 @@ dependencies = [ "syn", ] +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + [[package]] name = "zune-core" version = "0.4.12" diff --git a/Cargo.toml b/Cargo.toml index e558450..9551067 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ bytemuck = "1.23" wgpu = "25.0" ahash = "0.8" lyon = { version = "1.0"} -easy-tree = { version = "0.3" } +easy-tree = { version = "0.4" } tracing = "0.1.44" lru = "0.15" regex = "1" @@ -34,8 +34,13 @@ futures = "0.3" image = "0.25" transformator = "0.1.1" euclid = "0.22.11" +criterion = "0.5" grafo-test-scenes = { path = "grafo-test-scenes" } [features] performance_measurement = [] render_metrics = [] + +[[bench]] +name = "renderer_partial_updates" +harness = false diff --git a/benches/renderer_partial_updates.rs b/benches/renderer_partial_updates.rs new file mode 100644 index 0000000..99ea32d --- /dev/null +++ b/benches/renderer_partial_updates.rs @@ -0,0 +1,237 @@ +use std::time::Duration; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use futures::executor::block_on; +use grafo::{Color, Renderer, RendererCreationError, Shape, Stroke, TransformInstance}; + +const CANVAS_WIDTH: u32 = 1024; +const CANVAS_HEIGHT: u32 = 1024; +const SCALE_FACTOR: f64 = 1.0; + +const ROOT_CACHE_KEY: u64 = 1; +const LEAF_CACHE_KEY: u64 = 2; + +const TOTAL_LEAF_COUNT: usize = 1024; +const MUTABLE_LEAF_COUNT: usize = TOTAL_LEAF_COUNT / 2; +const LEAF_COLUMNS_PER_HALF: usize = 16; + +const ROOT_PADDING: f32 = 24.0; +const LEAF_SIZE: f32 = 24.0; +const LEAF_SPACING: f32 = 28.0; +const MUTABLE_VARIANT_OFFSET: f32 = 6.0; + +struct SceneBuildResult { + root_node_id: usize, + mutable_leaf_node_ids: Vec, +} + +struct FullRebuildBenchmarkState { + renderer: Renderer<'static>, + pixel_buffer: Vec, + mutable_variant: bool, +} + +impl FullRebuildBenchmarkState { + fn new() -> Option { + let mut renderer = try_create_headless_renderer()?; + load_benchmark_shapes(&mut renderer); + + Some(Self { + renderer, + pixel_buffer: Vec::new(), + mutable_variant: false, + }) + } + + fn render_next_frame(&mut self) { + self.renderer.clear_draw_queue(); + build_benchmark_scene(&mut self.renderer, self.mutable_variant); + self.renderer.render_to_buffer(&mut self.pixel_buffer); + self.mutable_variant = !self.mutable_variant; + } +} + +struct PartialUpdateBenchmarkState { + renderer: Renderer<'static>, + pixel_buffer: Vec, + root_node_id: usize, + mutable_leaf_node_ids: Vec, + mutable_variant: bool, +} + +impl PartialUpdateBenchmarkState { + fn new() -> Option { + let mut renderer = try_create_headless_renderer()?; + load_benchmark_shapes(&mut renderer); + let scene = build_benchmark_scene(&mut renderer, false); + + Some(Self { + renderer, + pixel_buffer: Vec::new(), + root_node_id: scene.root_node_id, + mutable_leaf_node_ids: scene.mutable_leaf_node_ids, + mutable_variant: false, + }) + } + + fn render_next_frame(&mut self) { + for node_id in self.mutable_leaf_node_ids.drain(..) { + self.renderer.remove_subtree(node_id); + } + + let next_mutable_variant = !self.mutable_variant; + self.mutable_leaf_node_ids = + add_mutable_leaf_nodes(&mut self.renderer, self.root_node_id, next_mutable_variant); + self.mutable_variant = next_mutable_variant; + + self.renderer.render_to_buffer(&mut self.pixel_buffer); + } +} + +fn try_create_headless_renderer() -> Option> { + match block_on(Renderer::try_new_headless( + (CANVAS_WIDTH, CANVAS_HEIGHT), + SCALE_FACTOR, + )) { + Ok(renderer) => Some(renderer), + Err(RendererCreationError::AdapterNotAvailable(_)) => None, + Err(error) => panic!("Failed to create headless renderer for benchmark: {error}"), + } +} + +fn load_benchmark_shapes(renderer: &mut Renderer<'static>) { + let root_shape = Shape::rect( + [(0.0, 0.0), (CANVAS_WIDTH as f32, CANVAS_HEIGHT as f32)], + Stroke::default(), + ); + renderer.load_shape(root_shape, ROOT_CACHE_KEY, Some(ROOT_CACHE_KEY)); + + let leaf_shape = Shape::rect([(0.0, 0.0), (LEAF_SIZE, LEAF_SIZE)], Stroke::default()); + renderer.load_shape(leaf_shape, LEAF_CACHE_KEY, Some(LEAF_CACHE_KEY)); +} + +fn build_benchmark_scene( + renderer: &mut Renderer<'static>, + mutable_variant: bool, +) -> SceneBuildResult { + let root_node_id = renderer.add_cached_shape_to_the_render_queue(ROOT_CACHE_KEY, None); + renderer.set_shape_color(root_node_id, Some(Color::TRANSPARENT)); + + add_stable_leaf_nodes(renderer, root_node_id); + let mutable_leaf_node_ids = add_mutable_leaf_nodes(renderer, root_node_id, mutable_variant); + + SceneBuildResult { + root_node_id, + mutable_leaf_node_ids, + } +} + +fn add_stable_leaf_nodes(renderer: &mut Renderer<'static>, root_node_id: usize) { + for stable_leaf_index in 0..MUTABLE_LEAF_COUNT { + let leaf_node_id = + renderer.add_cached_shape_to_the_render_queue(LEAF_CACHE_KEY, Some(root_node_id)); + renderer.set_shape_color(leaf_node_id, Some(color_for_stable_leaf(stable_leaf_index))); + renderer.set_shape_transform(leaf_node_id, stable_leaf_transform(stable_leaf_index)); + } +} + +fn add_mutable_leaf_nodes( + renderer: &mut Renderer<'static>, + root_node_id: usize, + mutable_variant: bool, +) -> Vec { + let mut mutable_leaf_node_ids = Vec::with_capacity(MUTABLE_LEAF_COUNT); + + for mutable_leaf_index in 0..MUTABLE_LEAF_COUNT { + let leaf_node_id = + renderer.add_cached_shape_to_the_render_queue(LEAF_CACHE_KEY, Some(root_node_id)); + renderer.set_shape_color( + leaf_node_id, + Some(color_for_mutable_leaf(mutable_leaf_index, mutable_variant)), + ); + renderer.set_shape_transform( + leaf_node_id, + mutable_leaf_transform(mutable_leaf_index, mutable_variant), + ); + mutable_leaf_node_ids.push(leaf_node_id); + } + + mutable_leaf_node_ids +} + +fn stable_leaf_transform(stable_leaf_index: usize) -> TransformInstance { + let row = stable_leaf_index / LEAF_COLUMNS_PER_HALF; + let column = stable_leaf_index % LEAF_COLUMNS_PER_HALF; + + TransformInstance::translation( + ROOT_PADDING + column as f32 * LEAF_SPACING, + ROOT_PADDING + row as f32 * LEAF_SPACING, + ) +} + +fn mutable_leaf_transform(mutable_leaf_index: usize, mutable_variant: bool) -> TransformInstance { + let row = mutable_leaf_index / LEAF_COLUMNS_PER_HALF; + let column = mutable_leaf_index % LEAF_COLUMNS_PER_HALF; + let variant_offset = if mutable_variant { + MUTABLE_VARIANT_OFFSET + } else { + 0.0 + }; + + TransformInstance::translation( + ROOT_PADDING + (column + LEAF_COLUMNS_PER_HALF) as f32 * LEAF_SPACING + variant_offset, + ROOT_PADDING + row as f32 * LEAF_SPACING + variant_offset, + ) +} + +fn color_for_stable_leaf(stable_leaf_index: usize) -> Color { + let intensity = 80 + (stable_leaf_index % 5) as u8 * 20; + Color::rgb(intensity, 140, 220) +} + +fn color_for_mutable_leaf(mutable_leaf_index: usize, mutable_variant: bool) -> Color { + let base_intensity = 70 + (mutable_leaf_index % 5) as u8 * 25; + if mutable_variant { + Color::rgb(220, base_intensity, 110) + } else { + Color::rgb(110, base_intensity, 220) + } +} + +fn benchmark_renderer_partial_updates(criterion: &mut Criterion) { + let Some(mut full_rebuild_state) = FullRebuildBenchmarkState::new() else { + eprintln!( + "Skipping renderer_partial_updates benchmark: no suitable GPU adapter available." + ); + return; + }; + let Some(mut partial_update_state) = PartialUpdateBenchmarkState::new() else { + eprintln!( + "Skipping renderer_partial_updates benchmark: no suitable GPU adapter available." + ); + return; + }; + + let mut benchmark_group = criterion.benchmark_group("renderer_full_frame"); + benchmark_group.measurement_time(Duration::from_secs(10)); + benchmark_group.sample_size(10); + + benchmark_group.bench_function("clear_tree_and_rebuild", |benchmark| { + benchmark.iter(|| { + full_rebuild_state.render_next_frame(); + black_box(full_rebuild_state.pixel_buffer.first().copied()); + }); + }); + + benchmark_group.bench_function("remove_half_and_repopulate", |benchmark| { + benchmark.iter(|| { + partial_update_state.render_next_frame(); + black_box(partial_update_state.pixel_buffer.first().copied()); + }); + }); + + benchmark_group.finish(); +} + +criterion_group!(benches, benchmark_renderer_partial_updates); +criterion_main!(benches); diff --git a/src/renderer/draw_queue.rs b/src/renderer/draw_queue.rs index 6872041..33999ab 100644 --- a/src/renderer/draw_queue.rs +++ b/src/renderer/draw_queue.rs @@ -1,5 +1,6 @@ use super::*; use crate::gradient::types::Fill; +use ahash::HashSet; impl<'a> Renderer<'a> { pub fn add_shape( @@ -63,6 +64,23 @@ impl<'a> Renderer<'a> { self.trim_scratch_on_resize_or_policy(); } + pub fn remove_subtree(&mut self, node_id: usize) { + if self.draw_tree.get(node_id).is_none() { + return; + } + + let parent_node_id = self.draw_tree.parent_index_unchecked(node_id); + let removed_node_ids = collect_subtree_node_ids(&self.draw_tree, node_id); + let removed_node_id_set: HashSet = removed_node_ids.iter().copied().collect(); + + self.draw_tree.remove_subtree(node_id); + self.remove_metadata_and_effects_for_removed_nodes(&removed_node_id_set); + + if let Some(parent_node_id) = parent_node_id { + self.sync_leaf_state(parent_node_id); + } + } + fn add_draw_command( &mut self, draw_command: DrawCommand, @@ -85,6 +103,30 @@ impl<'a> Renderer<'a> { } } + fn remove_metadata_and_effects_for_removed_nodes( + &mut self, + removed_node_id_set: &HashSet, + ) { + self.metadata_to_clips.retain(|node_id, clip_node_id| { + !removed_node_id_set.contains(node_id) && !removed_node_id_set.contains(clip_node_id) + }); + self.group_effects + .retain(|node_id, _| !removed_node_id_set.contains(node_id)); + self.backdrop_effects + .retain(|node_id, _| !removed_node_id_set.contains(node_id)); + } + + fn sync_leaf_state(&mut self, node_id: usize) { + if self.draw_tree.get(node_id).is_none() { + return; + } + + let node_has_children = !self.draw_tree.children(node_id).is_empty(); + if let Some(draw_command) = self.draw_tree.get_mut(node_id) { + draw_command.set_leaf_state(!node_has_children); + } + } + pub fn set_shape_transform_cols(&mut self, node_id: usize, cols: [[f32; 4]; 4]) { let transform = InstanceTransform { col0: cols[0], @@ -176,3 +218,26 @@ impl<'a> Renderer<'a> { ); } } + +fn collect_subtree_node_ids( + draw_tree: &easy_tree::Tree, + root_node_id: usize, +) -> Vec { + if draw_tree.get(root_node_id).is_none() { + return Vec::new(); + } + + let mut subtree_node_ids = Vec::new(); + let mut node_stack = vec![root_node_id]; + + while let Some(node_id) = node_stack.pop() { + if draw_tree.get(node_id).is_none() { + continue; + } + + subtree_node_ids.push(node_id); + node_stack.extend(draw_tree.children(node_id).iter().copied()); + } + + subtree_node_ids +} diff --git a/src/renderer/types.rs b/src/renderer/types.rs index e1cb121..6caeb32 100644 --- a/src/renderer/types.rs +++ b/src/renderer/types.rs @@ -33,6 +33,13 @@ impl DrawCommand { DrawCommand::CachedShape(s) => s.is_leaf = false, } } + + pub(super) fn set_leaf_state(&mut self, is_leaf: bool) { + match self { + DrawCommand::Shape(shape) => shape.is_leaf = is_leaf, + DrawCommand::CachedShape(cached_shape) => cached_shape.is_leaf = is_leaf, + } + } } impl DrawCommand { From f3c18444577994b1a77e315c53ca9b211eb287a2 Mon Sep 17 00:00:00 2001 From: Anton Suprunchuk Date: Fri, 10 Apr 2026 12:01:22 -0300 Subject: [PATCH 2/2] bla --- benches/renderer_partial_updates.rs | 342 +++++++++++++- src/renderer.rs | 17 +- src/renderer/construction.rs | 60 +-- src/renderer/draw_queue.rs | 224 ++++++--- src/renderer/passes.rs | 187 ++++++-- src/renderer/preparation.rs | 696 ++++++++++++++++++++++------ src/renderer/readback.rs | 31 ++ src/renderer/rect_utils.rs | 189 ++++++-- src/renderer/rendering.rs | 5 + src/renderer/types.rs | 69 ++- src/shaders/shader.wgsl | 104 +++-- src/shape.rs | 11 - src/vertex.rs | 17 + 13 files changed, 1557 insertions(+), 395 deletions(-) diff --git a/benches/renderer_partial_updates.rs b/benches/renderer_partial_updates.rs index 99ea32d..ee930d0 100644 --- a/benches/renderer_partial_updates.rs +++ b/benches/renderer_partial_updates.rs @@ -14,20 +14,24 @@ const LEAF_CACHE_KEY: u64 = 2; const TOTAL_LEAF_COUNT: usize = 1024; const MUTABLE_LEAF_COUNT: usize = TOTAL_LEAF_COUNT / 2; const LEAF_COLUMNS_PER_HALF: usize = 16; +const SHALLOW_SUBTREE_COUNT: usize = 100; +const SHALLOW_SUBTREE_COLUMNS: usize = 10; +const SHALLOW_SUBTREE_CHILDREN_PER_ROW: usize = 4; const ROOT_PADDING: f32 = 24.0; const LEAF_SIZE: f32 = 24.0; const LEAF_SPACING: f32 = 28.0; const MUTABLE_VARIANT_OFFSET: f32 = 6.0; +const SHALLOW_SUBTREE_SPACING_X: f32 = 72.0; +const SHALLOW_SUBTREE_SPACING_Y: f32 = 72.0; struct SceneBuildResult { root_node_id: usize, - mutable_leaf_node_ids: Vec, + mutable_subtree_root_id: usize, } struct FullRebuildBenchmarkState { renderer: Renderer<'static>, - pixel_buffer: Vec, mutable_variant: bool, } @@ -38,7 +42,6 @@ impl FullRebuildBenchmarkState { Some(Self { renderer, - pixel_buffer: Vec::new(), mutable_variant: false, }) } @@ -46,16 +49,21 @@ impl FullRebuildBenchmarkState { fn render_next_frame(&mut self) { self.renderer.clear_draw_queue(); build_benchmark_scene(&mut self.renderer, self.mutable_variant); - self.renderer.render_to_buffer(&mut self.pixel_buffer); + self.renderer.render_headless_frame(); + self.mutable_variant = !self.mutable_variant; + } + + fn mutate_next_frame(&mut self) { + self.renderer.clear_draw_queue(); + build_benchmark_scene(&mut self.renderer, self.mutable_variant); self.mutable_variant = !self.mutable_variant; } } struct PartialUpdateBenchmarkState { renderer: Renderer<'static>, - pixel_buffer: Vec, root_node_id: usize, - mutable_leaf_node_ids: Vec, + mutable_subtree_root_id: usize, mutable_variant: bool, } @@ -67,24 +75,109 @@ impl PartialUpdateBenchmarkState { Some(Self { renderer, - pixel_buffer: Vec::new(), root_node_id: scene.root_node_id, - mutable_leaf_node_ids: scene.mutable_leaf_node_ids, + mutable_subtree_root_id: scene.mutable_subtree_root_id, + mutable_variant: false, + }) + } + + fn render_next_frame(&mut self) { + self.renderer.remove_subtree(self.mutable_subtree_root_id); + + let next_mutable_variant = !self.mutable_variant; + self.mutable_subtree_root_id = + add_mutable_leaf_subtree(&mut self.renderer, self.root_node_id, next_mutable_variant); + self.mutable_variant = next_mutable_variant; + + self.renderer.render_headless_frame(); + } + + fn mutate_next_frame(&mut self) { + self.renderer.remove_subtree(self.mutable_subtree_root_id); + + let next_mutable_variant = !self.mutable_variant; + self.mutable_subtree_root_id = + add_mutable_leaf_subtree(&mut self.renderer, self.root_node_id, next_mutable_variant); + self.mutable_variant = next_mutable_variant; + } +} + +struct ManyShallowPartialUpdateBenchmarkState { + renderer: Renderer<'static>, + root_node_id: usize, + mutable_subtree_root_ids: Vec, + replacement_subtree_root_ids: Vec, + mutable_variant: bool, +} + +impl ManyShallowPartialUpdateBenchmarkState { + fn new() -> Option { + let mut renderer = try_create_headless_renderer()?; + load_benchmark_shapes(&mut renderer); + + let root_node_id = renderer.add_cached_shape_to_the_render_queue(ROOT_CACHE_KEY, None); + renderer.set_shape_color(root_node_id, Some(Color::TRANSPARENT)); + add_stable_leaf_nodes(&mut renderer, root_node_id); + + let mut mutable_subtree_root_ids = Vec::with_capacity(SHALLOW_SUBTREE_COUNT); + add_many_shallow_mutable_subtrees( + &mut renderer, + root_node_id, + false, + &mut mutable_subtree_root_ids, + ); + + Some(Self { + renderer, + root_node_id, + mutable_subtree_root_ids, + replacement_subtree_root_ids: Vec::with_capacity(SHALLOW_SUBTREE_COUNT), mutable_variant: false, }) } fn render_next_frame(&mut self) { - for node_id in self.mutable_leaf_node_ids.drain(..) { - self.renderer.remove_subtree(node_id); + for &subtree_root_id in &self.mutable_subtree_root_ids { + self.renderer.remove_subtree(subtree_root_id); } let next_mutable_variant = !self.mutable_variant; - self.mutable_leaf_node_ids = - add_mutable_leaf_nodes(&mut self.renderer, self.root_node_id, next_mutable_variant); + self.replacement_subtree_root_ids.clear(); + add_many_shallow_mutable_subtrees( + &mut self.renderer, + self.root_node_id, + next_mutable_variant, + &mut self.replacement_subtree_root_ids, + ); + + std::mem::swap( + &mut self.mutable_subtree_root_ids, + &mut self.replacement_subtree_root_ids, + ); self.mutable_variant = next_mutable_variant; - self.renderer.render_to_buffer(&mut self.pixel_buffer); + self.renderer.render_headless_frame(); + } + + fn mutate_next_frame(&mut self) { + for &subtree_root_id in &self.mutable_subtree_root_ids { + self.renderer.remove_subtree(subtree_root_id); + } + + let next_mutable_variant = !self.mutable_variant; + self.replacement_subtree_root_ids.clear(); + add_many_shallow_mutable_subtrees( + &mut self.renderer, + self.root_node_id, + next_mutable_variant, + &mut self.replacement_subtree_root_ids, + ); + + std::mem::swap( + &mut self.mutable_subtree_root_ids, + &mut self.replacement_subtree_root_ids, + ); + self.mutable_variant = next_mutable_variant; } } @@ -118,11 +211,11 @@ fn build_benchmark_scene( renderer.set_shape_color(root_node_id, Some(Color::TRANSPARENT)); add_stable_leaf_nodes(renderer, root_node_id); - let mutable_leaf_node_ids = add_mutable_leaf_nodes(renderer, root_node_id, mutable_variant); + let mutable_subtree_root_id = add_mutable_leaf_subtree(renderer, root_node_id, mutable_variant); SceneBuildResult { root_node_id, - mutable_leaf_node_ids, + mutable_subtree_root_id, } } @@ -135,16 +228,18 @@ fn add_stable_leaf_nodes(renderer: &mut Renderer<'static>, root_node_id: usize) } } -fn add_mutable_leaf_nodes( +fn add_mutable_leaf_subtree( renderer: &mut Renderer<'static>, root_node_id: usize, mutable_variant: bool, -) -> Vec { - let mut mutable_leaf_node_ids = Vec::with_capacity(MUTABLE_LEAF_COUNT); +) -> usize { + let mutable_subtree_root_id = + renderer.add_cached_shape_to_the_render_queue(ROOT_CACHE_KEY, Some(root_node_id)); + renderer.set_shape_color(mutable_subtree_root_id, Some(Color::TRANSPARENT)); for mutable_leaf_index in 0..MUTABLE_LEAF_COUNT { - let leaf_node_id = - renderer.add_cached_shape_to_the_render_queue(LEAF_CACHE_KEY, Some(root_node_id)); + let leaf_node_id = renderer + .add_cached_shape_to_the_render_queue(LEAF_CACHE_KEY, Some(mutable_subtree_root_id)); renderer.set_shape_color( leaf_node_id, Some(color_for_mutable_leaf(mutable_leaf_index, mutable_variant)), @@ -153,10 +248,100 @@ fn add_mutable_leaf_nodes( leaf_node_id, mutable_leaf_transform(mutable_leaf_index, mutable_variant), ); - mutable_leaf_node_ids.push(leaf_node_id); } - mutable_leaf_node_ids + mutable_subtree_root_id +} + +fn add_many_shallow_mutable_subtrees( + renderer: &mut Renderer<'static>, + root_node_id: usize, + mutable_variant: bool, + subtree_root_ids: &mut Vec, +) { + subtree_root_ids.clear(); + + for subtree_index in 0..SHALLOW_SUBTREE_COUNT { + let subtree_root_id = + renderer.add_cached_shape_to_the_render_queue(ROOT_CACHE_KEY, Some(root_node_id)); + renderer.set_shape_color(subtree_root_id, Some(Color::TRANSPARENT)); + renderer.set_shape_transform( + subtree_root_id, + shallow_subtree_root_transform(subtree_index, mutable_variant), + ); + + let first_leaf_index = shallow_subtree_leaf_start(subtree_index); + let leaf_count = shallow_subtree_leaf_count(subtree_index); + + for local_leaf_index in 0..leaf_count { + let mutable_leaf_index = first_leaf_index + local_leaf_index; + let leaf_node_id = renderer + .add_cached_shape_to_the_render_queue(LEAF_CACHE_KEY, Some(subtree_root_id)); + renderer.set_shape_color( + leaf_node_id, + Some(color_for_mutable_leaf(mutable_leaf_index, mutable_variant)), + ); + renderer.set_shape_transform( + leaf_node_id, + shallow_subtree_leaf_transform(local_leaf_index, mutable_variant), + ); + } + + subtree_root_ids.push(subtree_root_id); + } +} + +fn shallow_subtree_leaf_start(subtree_index: usize) -> usize { + let base_leaf_count = MUTABLE_LEAF_COUNT / SHALLOW_SUBTREE_COUNT; + let remainder_leaf_count = MUTABLE_LEAF_COUNT % SHALLOW_SUBTREE_COUNT; + + subtree_index * base_leaf_count + subtree_index.min(remainder_leaf_count) +} + +fn shallow_subtree_leaf_count(subtree_index: usize) -> usize { + let base_leaf_count = MUTABLE_LEAF_COUNT / SHALLOW_SUBTREE_COUNT; + let remainder_leaf_count = MUTABLE_LEAF_COUNT % SHALLOW_SUBTREE_COUNT; + + base_leaf_count + usize::from(subtree_index < remainder_leaf_count) +} + +fn shallow_subtree_root_transform( + subtree_index: usize, + mutable_variant: bool, +) -> TransformInstance { + let row = subtree_index / SHALLOW_SUBTREE_COLUMNS; + let column = subtree_index % SHALLOW_SUBTREE_COLUMNS; + let variant_offset = if mutable_variant { + MUTABLE_VARIANT_OFFSET + } else { + 0.0 + }; + + TransformInstance::translation( + ROOT_PADDING + column as f32 * SHALLOW_SUBTREE_SPACING_X + variant_offset, + ROOT_PADDING + + 20.0 * LEAF_SPACING + + row as f32 * SHALLOW_SUBTREE_SPACING_Y + + variant_offset, + ) +} + +fn shallow_subtree_leaf_transform( + local_leaf_index: usize, + mutable_variant: bool, +) -> TransformInstance { + let row = local_leaf_index / SHALLOW_SUBTREE_CHILDREN_PER_ROW; + let column = local_leaf_index % SHALLOW_SUBTREE_CHILDREN_PER_ROW; + let variant_offset = if mutable_variant { + MUTABLE_VARIANT_OFFSET + } else { + 0.0 + }; + + TransformInstance::translation( + column as f32 * LEAF_SPACING * 0.6 + variant_offset, + row as f32 * LEAF_SPACING * 0.6 + variant_offset, + ) } fn stable_leaf_transform(stable_leaf_index: usize) -> TransformInstance { @@ -211,6 +396,13 @@ fn benchmark_renderer_partial_updates(criterion: &mut Criterion) { ); return; }; + let Some(mut many_shallow_partial_update_state) = ManyShallowPartialUpdateBenchmarkState::new() + else { + eprintln!( + "Skipping renderer_partial_updates benchmark: no suitable GPU adapter available." + ); + return; + }; let mut benchmark_group = criterion.benchmark_group("renderer_full_frame"); benchmark_group.measurement_time(Duration::from_secs(10)); @@ -219,18 +411,120 @@ fn benchmark_renderer_partial_updates(criterion: &mut Criterion) { benchmark_group.bench_function("clear_tree_and_rebuild", |benchmark| { benchmark.iter(|| { full_rebuild_state.render_next_frame(); - black_box(full_rebuild_state.pixel_buffer.first().copied()); + black_box(full_rebuild_state.mutable_variant); }); }); benchmark_group.bench_function("remove_half_and_repopulate", |benchmark| { benchmark.iter(|| { partial_update_state.render_next_frame(); - black_box(partial_update_state.pixel_buffer.first().copied()); + black_box(partial_update_state.mutable_variant); + }); + }); + + benchmark_group.bench_function("remove_100_shallow_subtrees_and_repopulate", |benchmark| { + benchmark.iter(|| { + many_shallow_partial_update_state.render_next_frame(); + black_box(many_shallow_partial_update_state.mutable_variant); }); }); benchmark_group.finish(); + + let Some(mut full_rebuild_prepare_state) = FullRebuildBenchmarkState::new() else { + eprintln!("Skipping renderer_prepare_cpu benchmark: no suitable GPU adapter available."); + return; + }; + let Some(mut partial_update_prepare_state) = PartialUpdateBenchmarkState::new() else { + eprintln!("Skipping renderer_prepare_cpu benchmark: no suitable GPU adapter available."); + return; + }; + let Some(mut many_shallow_prepare_state) = ManyShallowPartialUpdateBenchmarkState::new() else { + eprintln!("Skipping renderer_prepare_cpu benchmark: no suitable GPU adapter available."); + return; + }; + + let mut prepare_group = criterion.benchmark_group("renderer_prepare_cpu"); + prepare_group.measurement_time(Duration::from_secs(10)); + prepare_group.sample_size(10); + + prepare_group.bench_function("clear_tree_and_rebuild", |benchmark| { + benchmark.iter_custom(|iterations| { + let mut total_prepare_time = Duration::ZERO; + for _ in 0..iterations { + full_rebuild_prepare_state.render_next_frame(); + total_prepare_time += full_rebuild_prepare_state.renderer.last_prepare_cpu_time(); + } + total_prepare_time + }); + }); + + prepare_group.bench_function("remove_half_and_repopulate", |benchmark| { + benchmark.iter_custom(|iterations| { + let mut total_prepare_time = Duration::ZERO; + for _ in 0..iterations { + partial_update_prepare_state.render_next_frame(); + total_prepare_time += partial_update_prepare_state + .renderer + .last_prepare_cpu_time(); + } + total_prepare_time + }); + }); + + prepare_group.bench_function("remove_100_shallow_subtrees_and_repopulate", |benchmark| { + benchmark.iter_custom(|iterations| { + let mut total_prepare_time = Duration::ZERO; + for _ in 0..iterations { + many_shallow_prepare_state.render_next_frame(); + total_prepare_time += many_shallow_prepare_state.renderer.last_prepare_cpu_time(); + } + total_prepare_time + }); + }); + + prepare_group.finish(); + + let Some(mut full_rebuild_mutation_state) = FullRebuildBenchmarkState::new() else { + eprintln!("Skipping renderer_mutation_cpu benchmark: no suitable GPU adapter available."); + return; + }; + let Some(mut partial_update_mutation_state) = PartialUpdateBenchmarkState::new() else { + eprintln!("Skipping renderer_mutation_cpu benchmark: no suitable GPU adapter available."); + return; + }; + let Some(mut many_shallow_mutation_state) = ManyShallowPartialUpdateBenchmarkState::new() + else { + eprintln!("Skipping renderer_mutation_cpu benchmark: no suitable GPU adapter available."); + return; + }; + + let mut mutation_group = criterion.benchmark_group("renderer_mutation_cpu"); + mutation_group.measurement_time(Duration::from_secs(10)); + mutation_group.sample_size(10); + + mutation_group.bench_function("clear_tree_and_rebuild", |benchmark| { + benchmark.iter(|| { + full_rebuild_mutation_state.mutate_next_frame(); + black_box(full_rebuild_mutation_state.mutable_variant); + }); + }); + + mutation_group.bench_function("remove_half_and_repopulate", |benchmark| { + benchmark.iter(|| { + partial_update_mutation_state.mutate_next_frame(); + black_box(partial_update_mutation_state.mutable_variant); + }); + }); + + mutation_group.bench_function("remove_100_shallow_subtrees_and_repopulate", |benchmark| { + benchmark.iter(|| { + many_shallow_mutation_state.mutate_next_frame(); + black_box(many_shallow_mutation_state.mutable_variant); + }); + }); + + mutation_group.finish(); } criterion_group!(benches, benchmark_renderer_partial_updates); diff --git a/src/renderer.rs b/src/renderer.rs index 1acaef2..c67bdcc 100644 --- a/src/renderer.rs +++ b/src/renderer.rs @@ -29,6 +29,7 @@ use crate::Color; #[cfg(feature = "render_metrics")] use self::metrics::RenderLoopMetricsTracker; +use self::prepared_scene::PreparedScene; use self::types::{DrawCommand, RendererScratch}; mod construction; @@ -38,6 +39,7 @@ mod effects; pub mod metrics; mod passes; mod preparation; +mod prepared_scene; mod readback; mod rect_utils; @@ -121,20 +123,7 @@ pub struct Renderer<'a> { /// Bind group for the decrementing pipeline. decrementing_bind_group: BindGroup, - temp_vertices: Vec, - temp_indices: Vec, - - /// Per-frame map from cache key to (index_start, index_count) in the - /// aggregated buffers, used to avoid duplicating vertex/index data for - /// cached shapes that share the same geometry. - geometry_dedup_map: HashMap, - - /// Per-frame instance transforms for shapes. - temp_instance_transforms: Vec, - /// Per-frame instance colors for shapes. - temp_instance_colors: Vec, - /// Per-frame instance metadata (draw order) for shapes. - temp_instance_metadata: Vec, + prepared_scene: PreparedScene, aggregated_vertex_buffer: Option, aggregated_index_buffer: Option, diff --git a/src/renderer/construction.rs b/src/renderer/construction.rs index e3595b1..38a3431 100644 --- a/src/renderer/construction.rs +++ b/src/renderer/construction.rs @@ -277,12 +277,7 @@ impl<'a> Renderer<'a> { decrementing_bind_group, draw_tree: easy_tree::Tree::new(), metadata_to_clips: HashMap::new(), - temp_vertices: Vec::new(), - temp_indices: Vec::new(), - geometry_dedup_map: HashMap::new(), - temp_instance_transforms: Vec::new(), - temp_instance_colors: Vec::new(), - temp_instance_metadata: Vec::new(), + prepared_scene: PreparedScene::new(), aggregated_vertex_buffer: None, aggregated_index_buffer: None, aggregated_instance_transform_buffer: None, @@ -357,36 +352,47 @@ impl<'a> Renderer<'a> { self.metadata_to_clips.len() ); - println!("\n--- Temporary Vectors ---"); + println!("\n--- Prepared Scene ---"); println!( - "Temp vertices: {} items, {} capacity, ~{} bytes", - self.temp_vertices.len(), - self.temp_vertices.capacity(), - self.temp_vertices.capacity() * std::mem::size_of::() + "Geometry vertices: {} items, {} capacity, ~{} bytes", + self.prepared_scene.geometry_vertices_len(), + self.prepared_scene.geometry_vertices.capacity(), + self.prepared_scene.geometry_vertices.capacity() + * std::mem::size_of::() ); println!( - "Temp indices: {} items, {} capacity, ~{} bytes", - self.temp_indices.len(), - self.temp_indices.capacity(), - self.temp_indices.capacity() * std::mem::size_of::() + "Geometry indices: {} items, {} capacity, ~{} bytes", + self.prepared_scene.geometry_indices_len(), + self.prepared_scene.geometry_indices.capacity(), + self.prepared_scene.geometry_indices.capacity() * std::mem::size_of::() ); println!( - "Temp instance transforms: {} items, {} capacity, ~{} bytes", - self.temp_instance_transforms.len(), - self.temp_instance_transforms.capacity(), - self.temp_instance_transforms.capacity() * std::mem::size_of::() + "Instance transforms: {} items, {} capacity, ~{} bytes", + self.prepared_scene.instance_transforms.len(), + self.prepared_scene.instance_transforms.capacity(), + self.prepared_scene.instance_transforms.capacity() + * std::mem::size_of::() ); println!( - "Temp instance colors: {} items, {} capacity, ~{} bytes", - self.temp_instance_colors.len(), - self.temp_instance_colors.capacity(), - self.temp_instance_colors.capacity() * std::mem::size_of::() + "Instance colors: {} items, {} capacity, ~{} bytes", + self.prepared_scene.instance_colors.len(), + self.prepared_scene.instance_colors.capacity(), + self.prepared_scene.instance_colors.capacity() * std::mem::size_of::() ); println!( - "Temp instance metadata: {} items, {} capacity, ~{} bytes", - self.temp_instance_metadata.len(), - self.temp_instance_metadata.capacity(), - self.temp_instance_metadata.capacity() * std::mem::size_of::() + "Instance metadata: {} items, {} capacity, ~{} bytes", + self.prepared_scene.instance_metadata.len(), + self.prepared_scene.instance_metadata.capacity(), + self.prepared_scene.instance_metadata.capacity() + * std::mem::size_of::() + ); + println!( + "Prepared geometry entries: {}", + self.prepared_scene.geometry_entries.len() + ); + println!( + "Prepared node bindings: {}", + self.prepared_scene.node_geometry_keys.len() ); println!("\n--- GPU Buffers ---"); diff --git a/src/renderer/draw_queue.rs b/src/renderer/draw_queue.rs index 33999ab..c635e73 100644 --- a/src/renderer/draw_queue.rs +++ b/src/renderer/draw_queue.rs @@ -1,8 +1,15 @@ +use super::prepared_scene::PreparedGeometryKey; use super::*; use crate::gradient::types::Fill; -use ahash::HashSet; impl<'a> Renderer<'a> { + fn should_mark_existing_instance_dirty( + prepared_scene: &super::prepared_scene::PreparedScene, + draw_command: &DrawCommand, + ) -> bool { + !prepared_scene.topology_dirty || draw_command.instance_index().is_some() + } + pub fn add_shape( &mut self, shape: impl Into, @@ -28,6 +35,28 @@ impl<'a> Renderer<'a> { tessellation_cache_key, ); self.shape_cache.insert(cache_key, cached_shape); + self.prepared_scene + .mark_geometry_key_dirty(PreparedGeometryKey::CachedShape(cache_key)); + + let cached_shape = self.shape_cache.get(&cache_key); + for (_node_id, draw_command) in self.draw_tree.iter_mut() { + let DrawCommand::CachedShape(cached_shape_draw_data) = draw_command else { + continue; + }; + + if cached_shape_draw_data.id == cache_key { + match cached_shape { + Some(cached_shape) => { + cached_shape_draw_data.is_rect = cached_shape.is_rect; + cached_shape_draw_data.rect_bounds = cached_shape.rect_bounds; + } + None => { + cached_shape_draw_data.is_rect = false; + cached_shape_draw_data.rect_bounds = None; + } + } + } + } } pub fn add_cached_shape_to_the_render_queue( @@ -60,6 +89,7 @@ impl<'a> Renderer<'a> { self.metadata_to_clips.clear(); self.group_effects.clear(); self.backdrop_effects.clear(); + self.prepared_scene.clear(); // Keep scratch storage bounded even if queue contents fluctuate frame-to-frame. self.trim_scratch_on_resize_or_policy(); } @@ -70,11 +100,43 @@ impl<'a> Renderer<'a> { } let parent_node_id = self.draw_tree.parent_index_unchecked(node_id); - let removed_node_ids = collect_subtree_node_ids(&self.draw_tree, node_id); - let removed_node_id_set: HashSet = removed_node_ids.iter().copied().collect(); + if self.draw_tree.children(node_id).is_empty() { + self.draw_tree.remove_subtree(node_id); + self.metadata_to_clips.remove(&node_id); + self.group_effects.remove(&node_id); + self.backdrop_effects.remove(&node_id); + self.prepared_scene.remove_node(node_id); + + if let Some(parent_node_id) = parent_node_id { + self.sync_leaf_state(parent_node_id); + } + return; + } + + let mut removed_node_ids = + std::mem::take(&mut self.prepared_scene.removed_node_ids_scratch); + let mut removed_node_id_set = + std::mem::take(&mut self.prepared_scene.removed_node_id_set_scratch); + let mut traversal_stack = std::mem::take(&mut self.prepared_scene.traversal_stack_scratch); + collect_subtree_node_ids_into( + &self.draw_tree, + node_id, + &mut removed_node_ids, + &mut traversal_stack, + ); + removed_node_id_set.clear(); + removed_node_id_set.extend(removed_node_ids.iter().copied()); self.draw_tree.remove_subtree(node_id); self.remove_metadata_and_effects_for_removed_nodes(&removed_node_id_set); + self.prepared_scene.remove_nodes(&removed_node_ids); + + removed_node_ids.clear(); + removed_node_id_set.clear(); + traversal_stack.clear(); + self.prepared_scene.removed_node_ids_scratch = removed_node_ids; + self.prepared_scene.removed_node_id_set_scratch = removed_node_id_set; + self.prepared_scene.traversal_stack_scratch = traversal_stack; if let Some(parent_node_id) = parent_node_id { self.sync_leaf_state(parent_node_id); @@ -86,7 +148,7 @@ impl<'a> Renderer<'a> { draw_command: DrawCommand, clip_to_shape: Option, ) -> usize { - if self.draw_tree.is_empty() { + let node_id = if self.draw_tree.is_empty() { self.draw_tree.add_node(draw_command) } else if let Some(clip_to_shape) = clip_to_shape { // Mark the parent as non-leaf since it now has a child. @@ -100,12 +162,16 @@ impl<'a> Renderer<'a> { root.set_not_leaf(); } self.draw_tree.add_child_to_root(draw_command) - } + }; + + self.prepared_scene.mark_topology_changed(); + self.prepared_scene.mark_node_instance_dirty(node_id); + node_id } fn remove_metadata_and_effects_for_removed_nodes( &mut self, - removed_node_id_set: &HashSet, + removed_node_id_set: &ahash::HashSet, ) { self.metadata_to_clips.retain(|node_id, clip_node_id| { !removed_node_id_set.contains(node_id) && !removed_node_id_set.contains(clip_node_id) @@ -135,18 +201,36 @@ impl<'a> Renderer<'a> { col3: cols[3], }; - let Some(draw_command) = self.draw_tree.get_mut(node_id) else { - return; + let should_mark_dirty = { + let Some(draw_command) = self.draw_tree.get_mut(node_id) else { + return; + }; + let should_mark_dirty = + Self::should_mark_existing_instance_dirty(&self.prepared_scene, draw_command); + draw_command.set_transform(transform); + should_mark_dirty }; - draw_command.set_transform(transform); + + if should_mark_dirty { + self.prepared_scene.mark_node_instance_dirty(node_id); + } } pub fn set_shape_transform(&mut self, node_id: usize, transform: impl Into) { let transform = transform.into(); - let Some(draw_command) = self.draw_tree.get_mut(node_id) else { - return; + let should_mark_dirty = { + let Some(draw_command) = self.draw_tree.get_mut(node_id) else { + return; + }; + let should_mark_dirty = + Self::should_mark_existing_instance_dirty(&self.prepared_scene, draw_command); + draw_command.set_transform(transform); + should_mark_dirty }; - draw_command.set_transform(transform); + + if should_mark_dirty { + self.prepared_scene.mark_node_instance_dirty(node_id); + } } pub fn set_shape_texture(&mut self, node_id: usize, texture_id: Option) { @@ -163,10 +247,19 @@ impl<'a> Renderer<'a> { return; } - let Some(draw_command) = self.draw_tree.get_mut(node_id) else { - return; + let should_mark_dirty = { + let Some(draw_command) = self.draw_tree.get_mut(node_id) else { + return; + }; + let should_mark_dirty = + Self::should_mark_existing_instance_dirty(&self.prepared_scene, draw_command); + draw_command.set_texture_id(layer, texture_id); + should_mark_dirty }; - draw_command.set_texture_id(layer, texture_id); + + if should_mark_dirty { + self.prepared_scene.mark_node_instance_dirty(node_id); + } } pub fn set_shape_texture_on( @@ -180,64 +273,85 @@ impl<'a> Renderer<'a> { pub fn set_shape_color(&mut self, node_id: usize, color: Option) { let normalized_color = color.map(|value| value.normalize()); - let Some(draw_command) = self.draw_tree.get_mut(node_id) else { - return; + let should_mark_dirty = { + let Some(draw_command) = self.draw_tree.get_mut(node_id) else { + return; + }; + let should_mark_dirty = + Self::should_mark_existing_instance_dirty(&self.prepared_scene, draw_command); + draw_command.set_instance_color_override(normalized_color); + // set_shape_color is sugar for Fill::Solid / None + let fill = color.map(Fill::Solid); + draw_command.set_fill(fill); + draw_command.refresh_gradient_bind_group( + &mut self.buffers_pool_manager.gradient_cache, + &self.device, + &self.queue, + &self.gradient_bind_group_layout, + &self.gradient_ramp_sampler, + self.gradient_bind_group_layout_epoch, + ); + should_mark_dirty }; - draw_command.set_instance_color_override(normalized_color); - // set_shape_color is sugar for Fill::Solid / None - let fill = color.map(Fill::Solid); - draw_command.set_fill(fill); - draw_command.refresh_gradient_bind_group( - &mut self.buffers_pool_manager.gradient_cache, - &self.device, - &self.queue, - &self.gradient_bind_group_layout, - &self.gradient_ramp_sampler, - self.gradient_bind_group_layout_epoch, - ); + + if should_mark_dirty { + self.prepared_scene.mark_node_instance_dirty(node_id); + } } pub fn set_shape_fill(&mut self, node_id: usize, fill: Option) { - let Some(draw_command) = self.draw_tree.get_mut(node_id) else { - return; - }; - // Derive color_override from fill for the solid fast path - let color_override = match &fill { - Some(Fill::Solid(color)) => Some(color.normalize()), - _ => None, + let should_mark_dirty = { + let Some(draw_command) = self.draw_tree.get_mut(node_id) else { + return; + }; + let should_mark_dirty = + Self::should_mark_existing_instance_dirty(&self.prepared_scene, draw_command); + // Derive color_override from fill for the solid fast path + let color_override = match &fill { + Some(Fill::Solid(color)) => Some(color.normalize()), + _ => None, + }; + draw_command.set_instance_color_override(color_override); + draw_command.set_fill(fill); + draw_command.refresh_gradient_bind_group( + &mut self.buffers_pool_manager.gradient_cache, + &self.device, + &self.queue, + &self.gradient_bind_group_layout, + &self.gradient_ramp_sampler, + self.gradient_bind_group_layout_epoch, + ); + should_mark_dirty }; - draw_command.set_instance_color_override(color_override); - draw_command.set_fill(fill); - draw_command.refresh_gradient_bind_group( - &mut self.buffers_pool_manager.gradient_cache, - &self.device, - &self.queue, - &self.gradient_bind_group_layout, - &self.gradient_ramp_sampler, - self.gradient_bind_group_layout_epoch, - ); + + if should_mark_dirty { + self.prepared_scene.mark_node_instance_dirty(node_id); + } } } -fn collect_subtree_node_ids( +fn collect_subtree_node_ids_into( draw_tree: &easy_tree::Tree, root_node_id: usize, -) -> Vec { + subtree_node_ids: &mut Vec, + traversal_stack: &mut Vec, +) { if draw_tree.get(root_node_id).is_none() { - return Vec::new(); + subtree_node_ids.clear(); + traversal_stack.clear(); + return; } - let mut subtree_node_ids = Vec::new(); - let mut node_stack = vec![root_node_id]; + subtree_node_ids.clear(); + traversal_stack.clear(); + traversal_stack.push(root_node_id); - while let Some(node_id) = node_stack.pop() { + while let Some(node_id) = traversal_stack.pop() { if draw_tree.get(node_id).is_none() { continue; } subtree_node_ids.push(node_id); - node_stack.extend(draw_tree.children(node_id).iter().copied()); + traversal_stack.extend(draw_tree.children(node_id).iter().copied()); } - - subtree_node_ids } diff --git a/src/renderer/passes.rs b/src/renderer/passes.rs index 8d257cc..56bb064 100644 --- a/src/renderer/passes.rs +++ b/src/renderer/passes.rs @@ -1,7 +1,8 @@ use super::types::{ClipKind, TraversalEvent}; use super::*; use crate::renderer::rect_utils::{ - intersect_scissor, should_skip_visible_rect_draw, try_scissor_for_rect, + intersect_scissor, should_skip_visible_rect_draw, should_use_discard_rect_clip, + try_logical_clip_rect_for_draw_command, try_scissor_for_rect, LogicalClipRect, }; /// Dispatch on `DrawCommand::Shape` / `DrawCommand::CachedShape`, binding the @@ -236,6 +237,46 @@ fn pipeline_has_shared_geometry_bindings(pipeline: crate::renderer::types::Pipel !matches!(pipeline, crate::renderer::types::Pipeline::None) } +fn push_logical_clip_rect( + logical_clip_stack: &mut Vec, + clip_rect: LogicalClipRect, +) { + let merged_clip_rect = logical_clip_stack + .last() + .copied() + .map(|current_clip_rect| current_clip_rect.intersect(clip_rect)) + .unwrap_or(clip_rect); + logical_clip_stack.push(merged_clip_rect); +} + +fn apply_fullscreen_clip_scissor( + render_pass: &mut wgpu::RenderPass<'_>, + scissor_stack: &[(u32, u32, u32, u32)], + logical_clip_stack: &[LogicalClipRect], + scale_factor: f64, + physical_size: (u32, u32), +) -> bool { + let viewport_scissor = (0u32, 0u32, physical_size.0, physical_size.1); + let mut scissor_rect = scissor_stack.last().copied().unwrap_or(viewport_scissor); + if let Some(current_clip_rect) = logical_clip_stack.last().copied() { + scissor_rect = intersect_scissor( + scissor_rect, + current_clip_rect.to_physical_scissor(scale_factor, physical_size), + ); + } + if scissor_rect == viewport_scissor { + return false; + } + + render_pass.set_scissor_rect( + scissor_rect.0, + scissor_rect.1, + scissor_rect.2, + scissor_rect.3, + ); + true +} + pub(super) fn handle_increment_pass<'rp>( render_pass: &mut wgpu::RenderPass<'rp>, currently_set_pipeline: &mut crate::renderer::types::PipelineTracker, @@ -619,6 +660,7 @@ pub(super) fn render_segments( backdrop_work_textures: &mut Vec, stencil_stack: &mut Vec, scissor_stack: &mut Vec<(u32, u32, u32, u32)>, + logical_clip_stack: &mut Vec, clip_kind_stack: &mut Vec, scale_factor: f64, physical_size: (u32, u32), @@ -635,6 +677,7 @@ pub(super) fn render_segments( stencil_stack.clear(); scissor_stack.clear(); scissor_stack.push(viewport_scissor); + logical_clip_stack.clear(); backdrop_work_textures.clear(); clip_kind_stack.clear(); @@ -716,7 +759,22 @@ pub(super) fn render_segments( render_pass.set_pipeline(pipeline); render_pass.set_bind_group(0, result_bind_group, &[]); render_pass.set_stencil_reference(parent_stencil); + let applied_clip_scissor = apply_fullscreen_clip_scissor( + &mut render_pass, + scissor_stack, + logical_clip_stack, + scale_factor, + physical_size, + ); render_pass.draw(0..3, 0..1); + if applied_clip_scissor { + render_pass.set_scissor_rect( + viewport_scissor.0, + viewport_scissor.1, + viewport_scissor.2, + viewport_scissor.3, + ); + } currently_set_pipeline.switch_to(types::Pipeline::None); bound_texture_state.invalidate(); } @@ -816,40 +874,88 @@ pub(super) fn render_segments( }); stencil_stack.push(parent_stencil); clip_kind_stack.push(ClipKind::NonClipping); - } else if let Some(scissor_rect) = - try_scissor_for_rect(draw_command, scale_factor, physical_size) + } else if let Some(logical_clip_rect) = + try_logical_clip_rect_for_draw_command(draw_command) { - // Scissor optimization: rect parent with axis-aligned - // transform. Use hardware scissor instead of stencil. - let current_scissor = - scissor_stack.last().copied().unwrap_or(viewport_scissor); - let clipped = intersect_scissor(current_scissor, scissor_rect); - scissor_stack.push(clipped); - render_pass - .set_scissor_rect(clipped.0, clipped.1, clipped.2, clipped.3); - #[cfg(feature = "render_metrics")] - currently_set_pipeline.record_scissor_clip(); - - // Draw the rect itself as a visible shape. - let parent_stencil = stencil_stack.last().copied().unwrap_or(0); - with_shape_mut!(draw_command, shape => { - *shape.stencil_ref_mut() = Some(parent_stencil); - if !should_skip_visible_draw { - handle_leaf_draw_pass( - &mut render_pass, - &mut currently_set_pipeline, - &mut bound_texture_state, - stencil_stack, - shape, - pipelines, - buffers, - ); - } - }); - // Push same stencil — children are clipped by scissor - // hardware, not by stencil buffer values. - stencil_stack.push(parent_stencil); - clip_kind_stack.push(ClipKind::Scissor); + if should_use_discard_rect_clip( + logical_clip_rect, + ( + width as f32 / scale_factor as f32, + height as f32 / scale_factor as f32, + ), + ) { + // Tight axis-aligned rect parent: descendants inherit a + // per-instance logical clip rect, so we avoid mutating + // render-pass scissor state for every subtree boundary. + let parent_stencil = stencil_stack.last().copied().unwrap_or(0); + with_shape_mut!(draw_command, shape => { + *shape.stencil_ref_mut() = Some(parent_stencil); + if !should_skip_visible_draw { + handle_leaf_draw_pass( + &mut render_pass, + &mut currently_set_pipeline, + &mut bound_texture_state, + stencil_stack, + shape, + pipelines, + buffers, + ); + } + }); + push_logical_clip_rect(logical_clip_stack, logical_clip_rect); + stencil_stack.push(parent_stencil); + clip_kind_stack.push(ClipKind::DiscardRectClip); + } else if let Some(scissor_rect) = + try_scissor_for_rect(draw_command, scale_factor, physical_size) + { + // Large rect clip: fixed-function scissor is still cheaper + // than per-fragment discard. + let current_scissor = + scissor_stack.last().copied().unwrap_or(viewport_scissor); + let clipped = intersect_scissor(current_scissor, scissor_rect); + scissor_stack.push(clipped); + render_pass.set_scissor_rect( + clipped.0, clipped.1, clipped.2, clipped.3, + ); + #[cfg(feature = "render_metrics")] + currently_set_pipeline.record_scissor_clip(); + + let parent_stencil = stencil_stack.last().copied().unwrap_or(0); + with_shape_mut!(draw_command, shape => { + *shape.stencil_ref_mut() = Some(parent_stencil); + if !should_skip_visible_draw { + handle_leaf_draw_pass( + &mut render_pass, + &mut currently_set_pipeline, + &mut bound_texture_state, + stencil_stack, + shape, + pipelines, + buffers, + ); + } + }); + stencil_stack.push(parent_stencil); + clip_kind_stack.push(ClipKind::Scissor); + } else { + let parent_stencil = stencil_stack.last().copied().unwrap_or(0); + with_shape_mut!(draw_command, shape => { + *shape.stencil_ref_mut() = Some(parent_stencil); + if !should_skip_visible_draw { + handle_leaf_draw_pass( + &mut render_pass, + &mut currently_set_pipeline, + &mut bound_texture_state, + stencil_stack, + shape, + pipelines, + buffers, + ); + } + }); + stencil_stack.push(parent_stencil); + clip_kind_stack.push(ClipKind::NonClipping); + } } else { // Fall back to stencil increment. with_shape_mut!(draw_command, shape => { @@ -900,6 +1006,18 @@ pub(super) fn render_segments( render_pass.set_scissor_rect(prev.0, prev.1, prev.2, prev.3); stencil_stack.pop(); } + Some(ClipKind::DiscardRectClip) => { + flush_pending_leaf_batch( + &mut pending_leaf_batch, + &mut render_pass, + &mut currently_set_pipeline, + &mut bound_texture_state, + pipelines, + buffers, + ); + logical_clip_stack.pop(); + stencil_stack.pop(); + } Some(ClipKind::Stencil) => { flush_pending_leaf_batch( &mut pending_leaf_batch, @@ -1029,7 +1147,6 @@ pub(super) fn render_segments( }, ); - // Restore scissor in the backdrop pass. let current_scissor = scissor_stack.last().copied().unwrap_or(viewport_scissor); if current_scissor != viewport_scissor { render_pass.set_scissor_rect( diff --git a/src/renderer/preparation.rs b/src/renderer/preparation.rs index 98c4ad9..59fb645 100644 --- a/src/renderer/preparation.rs +++ b/src/renderer/preparation.rs @@ -1,81 +1,253 @@ +use std::ops::Range; + +use bytemuck::Pod; +use bytemuck::Zeroable; +use tracing::warn; + +use super::prepared_scene::{PreparedGeometryKey, PreparedGeometryUpload}; use super::types::decide_buffer_sizing; use super::*; +use crate::renderer::rect_utils::{ + should_use_discard_rect_clip, try_logical_clip_rect_for_draw_command, LogicalClipRect, +}; -fn upsert_gpu_buffer( +fn create_gpu_buffer( + device: &wgpu::Device, + label: &'static str, + size: u64, + usage: wgpu::BufferUsages, +) -> wgpu::Buffer { + device.create_buffer(&wgpu::BufferDescriptor { + label: Some(label), + size: size.max(4), + usage, + mapped_at_creation: false, + }) +} + +fn ensure_gpu_buffer_capacity( device: &wgpu::Device, - queue: &wgpu::Queue, buffer: &mut Option, label: &'static str, - bytes: &[u8], usage: wgpu::BufferUsages, -) { - let decision = - decide_buffer_sizing(buffer.as_ref().map(|existing| existing.size()), bytes.len()); + required_size_in_bytes: usize, +) -> bool { + let decision = decide_buffer_sizing( + buffer + .as_ref() + .map(|existing_buffer| existing_buffer.size()), + required_size_in_bytes, + ); if decision.should_reallocate { - *buffer = Some(crate::pipeline::create_buffer_init( + *buffer = Some(create_gpu_buffer( device, - Some(label), - bytes, + label, + decision.target_size, usage, )); - } else if let Some(existing_buffer) = buffer.as_ref() { - queue.write_buffer(existing_buffer, 0, bytes); + true + } else { + false } } -fn append_aggregated_geometry( - temp_vertices: &mut Vec, - temp_indices: &mut Vec, - vertices: &[crate::vertex::CustomVertex], - indices: &[u16], -) -> Option<(usize, usize)> { - if vertices.is_empty() || indices.is_empty() { - return None; - } +fn write_buffer_ranges( + queue: &wgpu::Queue, + buffer: &wgpu::Buffer, + source_data: &[T], + dirty_ranges: &[Range], +) { + for dirty_range in dirty_ranges { + if dirty_range.is_empty() { + continue; + } - let vertex_start = temp_vertices.len(); - if vertex_start > u16::MAX as usize { - warn!( - "Aggregated vertex count ({}) exceeds u16 limit. Rendering artifacts may occur.", - vertex_start + let byte_offset = (dirty_range.start * std::mem::size_of::()) as u64; + queue.write_buffer( + buffer, + byte_offset, + bytemuck::cast_slice(&source_data[dirty_range.clone()]), ); } +} + +fn write_full_buffer(queue: &wgpu::Queue, buffer: &wgpu::Buffer, source_data: &[T]) { + queue.write_buffer(buffer, 0, bytemuck::cast_slice(source_data)); +} + +fn coalesce_ranges(dirty_ranges: &mut Vec>) { + if dirty_ranges.len() <= 1 { + return; + } - let index_start = temp_indices.len(); - let vertex_offset = vertex_start as u16; - temp_vertices.extend_from_slice(vertices); + dirty_ranges.sort_unstable_by_key(|dirty_range| dirty_range.start); - for &index in indices { - temp_indices.push(index + vertex_offset); + let mut merged_ranges: Vec> = Vec::with_capacity(dirty_ranges.len()); + for dirty_range in dirty_ranges.drain(..) { + if let Some(last_merged_range) = merged_ranges.last_mut() { + if dirty_range.start <= last_merged_range.end { + last_merged_range.end = last_merged_range.end.max(dirty_range.end); + continue; + } + } + + merged_ranges.push(dirty_range); + } + + *dirty_ranges = merged_ranges; +} + +fn find_first_mismatched_slot(previous_order: &[usize], current_order: &[usize]) -> Option { + let shared_prefix_len = previous_order.len().min(current_order.len()); + for index in 0..shared_prefix_len { + if previous_order[index] != current_order[index] { + return Some(index); + } } - Some((index_start, indices.len())) + (previous_order.len() != current_order.len()).then_some(shared_prefix_len) } -fn append_instance_data( - temp_instance_transforms: &mut Vec, - temp_instance_colors: &mut Vec, - temp_instance_metadata: &mut Vec, - transform: Option, - color_override: Option<[f32; 4]>, - texture_ids: [Option; 2], -) -> usize { - let instance_index = temp_instance_transforms.len(); - temp_instance_transforms.push(transform.unwrap_or_else(InstanceTransform::identity)); - temp_instance_colors.push(InstanceColor { - color: color_override.unwrap_or([0.0, 0.0, 0.0, 0.0]), - }); - let texture_flags = - (texture_ids[0].is_some() as u32) | ((texture_ids[1].is_some() as u32) << 1); - temp_instance_metadata.push(InstanceMetadata { - draw_order: instance_index as f32, +fn min_dirty_slot(current_first_dirty_slot: Option, candidate_slot: usize) -> Option { + Some(match current_first_dirty_slot { + Some(existing_slot) => existing_slot.min(candidate_slot), + None => candidate_slot, + }) +} + +fn collect_depth_first_node_ids( + draw_tree: &easy_tree::Tree, + depth_first_nodes: &mut Vec, + traversal_stack: &mut Vec, +) { + depth_first_nodes.clear(); + traversal_stack.clear(); + if draw_tree.is_empty() { + return; + } + + traversal_stack.push(0); + while let Some(node_id) = traversal_stack.pop() { + depth_first_nodes.push(node_id); + for &child_node_id in draw_tree.children(node_id).iter().rev() { + traversal_stack.push(child_node_id); + } + } +} + +fn geometry_key_for_draw_command( + node_id: usize, + draw_command: &DrawCommand, +) -> PreparedGeometryKey { + match draw_command { + DrawCommand::Shape(shape) => match shape.cache_key { + Some(cache_key) => PreparedGeometryKey::SharedShape(cache_key), + None => PreparedGeometryKey::NodeLocal(node_id), + }, + DrawCommand::CachedShape(cached_shape) => PreparedGeometryKey::CachedShape(cached_shape.id), + } +} + +fn update_prepared_geometry_from_draw_command( + prepared_scene: &mut super::prepared_scene::PreparedScene, + geometry_key: PreparedGeometryKey, + draw_command: &mut DrawCommand, + shape_cache: &HashMap, + tessellator: &mut FillTessellator, + buffers_pool_manager: &mut PoolManager, +) -> PreparedGeometryUpload { + match draw_command { + DrawCommand::Shape(shape) => { + let tessellated_geometry = shape.tessellate(tessellator, buffers_pool_manager); + let geometry_upload = prepared_scene.update_geometry( + geometry_key, + tessellated_geometry.vertices(), + tessellated_geometry.indices(), + ); + + if let Some(owned_vertex_buffers) = tessellated_geometry.into_owned() { + buffers_pool_manager + .lyon_vertex_buffers_pool + .return_vertex_buffers(owned_vertex_buffers); + } + + geometry_upload + } + DrawCommand::CachedShape(cached_shape) => match shape_cache.get(&cached_shape.id) { + Some(loaded_cached_shape) => prepared_scene.update_geometry( + geometry_key, + &loaded_cached_shape.vertex_buffers.vertices, + &loaded_cached_shape.vertex_buffers.indices, + ), + None => { + warn!("Cached shape not found in cache"); + prepared_scene.update_geometry(geometry_key, &[], &[]) + } + }, + } +} + +fn build_instance_transform(draw_command: &DrawCommand) -> InstanceTransform { + draw_command + .transform() + .unwrap_or_else(InstanceTransform::identity) +} + +fn build_instance_color(draw_command: &DrawCommand) -> InstanceColor { + InstanceColor { + color: draw_command + .instance_color_override() + .unwrap_or([0.0, 0.0, 0.0, 0.0]), + } +} + +fn build_instance_metadata( + draw_command: &DrawCommand, + draw_order: usize, + inherited_clip_rect: Option, +) -> InstanceMetadata { + let texture_flags = (draw_command.texture_id(0).is_some() as u32) + | ((draw_command.texture_id(1).is_some() as u32) << 1); + + let (clip_rect_min, clip_rect_max) = inherited_clip_rect + .map(|clip_rect| { + ( + [clip_rect.min_x, clip_rect.min_y], + [clip_rect.max_x, clip_rect.max_y], + ) + }) + .unwrap_or(([1.0, 1.0], [0.0, 0.0])); + + InstanceMetadata { + draw_order: draw_order as f32, texture_flags: texture_flags as f32, - }); - instance_index + clip_rect_min, + clip_rect_max, + } } impl<'a> Renderer<'a> { + fn ensure_placeholder_geometry_buffers(&mut self) { + if self.aggregated_vertex_buffer.is_none() { + self.aggregated_vertex_buffer = Some(crate::pipeline::create_buffer_init( + &self.device, + Some("Placeholder Aggregated Vertex Buffer"), + bytemuck::cast_slice(&[crate::vertex::CustomVertex::zeroed()]), + BufferUsages::VERTEX | BufferUsages::COPY_DST, + )); + } + + if self.aggregated_index_buffer.is_none() { + self.aggregated_index_buffer = Some(crate::pipeline::create_buffer_init( + &self.device, + Some("Placeholder Aggregated Index Buffer"), + bytemuck::cast_slice(&[0u16]), + BufferUsages::INDEX | BufferUsages::COPY_DST, + )); + } + } + fn ensure_identity_instance_buffers(&mut self) { if self.identity_instance_transform_buffer.is_none() { let identity = InstanceTransform::identity(); @@ -111,148 +283,372 @@ impl<'a> Renderer<'a> { pub(super) fn prepare_render(&mut self) { let prepare_started_at = std::time::Instant::now(); - self.temp_vertices.clear(); - self.temp_indices.clear(); - self.temp_instance_transforms.clear(); - self.temp_instance_colors.clear(); - self.temp_instance_metadata.clear(); - self.geometry_dedup_map.clear(); - - for (_node_id, draw_command) in self.draw_tree.iter_mut() { - match draw_command { - DrawCommand::Shape(shape) => { - let tessellated_geometry = - shape.tessellate(&mut self.tessellator, &mut self.buffers_pool_manager); - - if let Some((index_start, index_count)) = append_aggregated_geometry( - &mut self.temp_vertices, - &mut self.temp_indices, - tessellated_geometry.vertices(), - tessellated_geometry.indices(), - ) { - shape.index_buffer_range = Some((index_start, index_count)); - let instance_index = append_instance_data( - &mut self.temp_instance_transforms, - &mut self.temp_instance_colors, - &mut self.temp_instance_metadata, - shape.transform(), - shape.instance_color_override(), - shape.texture_ids, - ); - *shape.instance_index_mut() = Some(instance_index); - shape.is_empty = false; - } else { - shape.is_empty = true; - } + self.ensure_identity_instance_buffers(); - if let Some(owned_vertex_buffers) = tessellated_geometry.into_owned() { - self.buffers_pool_manager - .lyon_vertex_buffers_pool - .return_vertex_buffers(owned_vertex_buffers); + if self.draw_tree.is_empty() { + self.prepared_scene.drawable_order.clear(); + self.prepared_scene.instance_transforms.clear(); + self.prepared_scene.instance_colors.clear(); + self.prepared_scene.instance_metadata.clear(); + self.prepared_scene.finish_prepare(); + self.last_prepare_cpu_time = prepare_started_at.elapsed(); + return; + } + + let mut depth_first_nodes = + std::mem::take(&mut self.prepared_scene.depth_first_nodes_scratch); + let mut traversal_stack = std::mem::take(&mut self.prepared_scene.traversal_stack_scratch); + collect_depth_first_node_ids( + &self.draw_tree, + &mut depth_first_nodes, + &mut traversal_stack, + ); + + let mut drawable_order = std::mem::take(&mut self.prepared_scene.drawable_order_scratch); + drawable_order.clear(); + let mut node_instance_clip_rects = + std::mem::take(&mut self.prepared_scene.node_instance_clip_rects_scratch); + let mut node_children_clip_rects = + std::mem::take(&mut self.prepared_scene.node_children_clip_rects_scratch); + + let mut dirty_vertex_ranges = + std::mem::take(&mut self.prepared_scene.geometry_vertex_upload_ranges); + dirty_vertex_ranges.clear(); + let mut dirty_index_ranges = + std::mem::take(&mut self.prepared_scene.geometry_index_upload_ranges); + dirty_index_ranges.clear(); + let mut requires_full_geometry_upload = false; + let canvas_logical_size = to_logical(self.physical_size, self.scale_factor); + + if let Some(max_node_id) = depth_first_nodes.iter().copied().max() { + if node_instance_clip_rects.len() <= max_node_id { + node_instance_clip_rects.resize(max_node_id + 1, None); + } + if node_children_clip_rects.len() <= max_node_id { + node_children_clip_rects.resize(max_node_id + 1, None); + } + + for &node_id in &depth_first_nodes { + node_instance_clip_rects[node_id] = None; + node_children_clip_rects[node_id] = None; + } + } + + { + let draw_tree = &mut self.draw_tree; + let prepared_scene = &mut self.prepared_scene; + let shape_cache = &self.shape_cache; + let tessellator = &mut self.tessellator; + let buffers_pool_manager = &mut self.buffers_pool_manager; + + for &node_id in &depth_first_nodes { + let inherited_clip_rect = + draw_tree + .parent_index_unchecked(node_id) + .and_then(|parent_node_id| { + node_children_clip_rects + .get(parent_node_id) + .copied() + .flatten() + }); + node_instance_clip_rects[node_id] = inherited_clip_rect; + + let draw_command = draw_tree + .get_mut(node_id) + .expect("depth-first node list must only contain live node ids"); + let children_clip_rect = if draw_command.clips_children() { + match try_logical_clip_rect_for_draw_command(draw_command) { + Some(node_clip_rect) + if should_use_discard_rect_clip( + node_clip_rect, + canvas_logical_size, + ) => + { + Some( + inherited_clip_rect + .map(|clip_rect| clip_rect.intersect(node_clip_rect)) + .unwrap_or(node_clip_rect), + ) + } + None => inherited_clip_rect, + Some(_) => inherited_clip_rect, } - } - DrawCommand::CachedShape(cached_shape_data) => { - // Geometry deduplication: if we already appended this cache - // key's vertices/indices, reuse the same range. - let index_range = if let Some(&existing_range) = - self.geometry_dedup_map.get(&cached_shape_data.id) - { - Some(existing_range) - } else if let Some(cached_shape) = - self.shape_cache.get_mut(&cached_shape_data.id) - { - let vertex_buffers = &cached_shape.vertex_buffers; - let range = append_aggregated_geometry( - &mut self.temp_vertices, - &mut self.temp_indices, - &vertex_buffers.vertices, - &vertex_buffers.indices, - ); - if let Some(range) = range { - self.geometry_dedup_map.insert(cached_shape_data.id, range); + } else { + inherited_clip_rect + }; + node_children_clip_rects[node_id] = children_clip_rect; + + let geometry_key = geometry_key_for_draw_command(node_id, draw_command); + + prepared_scene.ensure_node_geometry_key(node_id, geometry_key); + + if prepared_scene.dirty_geometry_keys.contains(&geometry_key) { + match update_prepared_geometry_from_draw_command( + prepared_scene, + geometry_key, + draw_command, + shape_cache, + tessellator, + buffers_pool_manager, + ) { + PreparedGeometryUpload::None => {} + PreparedGeometryUpload::Partial { + vertex_range, + index_range, + } => { + dirty_vertex_ranges.push(vertex_range); + dirty_index_ranges.push(index_range); + } + PreparedGeometryUpload::Full => { + requires_full_geometry_upload = true; } - range - } else { - warn!("Cached shape not found in cache"); - None - }; - - if let Some((index_start, index_count)) = index_range { - cached_shape_data.index_buffer_range = Some((index_start, index_count)); - cached_shape_data.is_empty = false; - let instance_index = append_instance_data( - &mut self.temp_instance_transforms, - &mut self.temp_instance_colors, - &mut self.temp_instance_metadata, - cached_shape_data.transform(), - cached_shape_data.instance_color_override(), - cached_shape_data.texture_ids, - ); - *cached_shape_data.instance_index_mut() = Some(instance_index); - } else { - cached_shape_data.is_empty = true; } } + + let geometry_entry = prepared_scene.geometry_entry(geometry_key); + let index_buffer_range = geometry_entry.and_then(|entry| entry.index_range()); + let is_empty = geometry_entry + .map(|entry| entry.vertices.is_empty() || entry.indices.is_empty()) + .unwrap_or(true); + + draw_command.set_prepared_geometry(index_buffer_range, is_empty); + if is_empty { + draw_command.set_instance_index(None); + } else { + drawable_order.push(node_id); + } } } - if !self.temp_vertices.is_empty() { - upsert_gpu_buffer( + let mut first_dirty_instance_slot = if self.prepared_scene.topology_dirty { + find_first_mismatched_slot(&self.prepared_scene.drawable_order, &drawable_order) + } else { + None + }; + + for (slot, &node_id) in drawable_order.iter().enumerate() { + let draw_command = self + .draw_tree + .get_mut(node_id) + .expect("drawable order must only contain live node ids"); + + if draw_command.instance_index() != Some(slot) { + first_dirty_instance_slot = min_dirty_slot(first_dirty_instance_slot, slot); + } + if self.prepared_scene.dirty_instance_nodes.contains(&node_id) { + first_dirty_instance_slot = min_dirty_slot(first_dirty_instance_slot, slot); + } + + draw_command.set_instance_index(Some(slot)); + } + + self.prepared_scene + .instance_transforms + .resize(drawable_order.len(), InstanceTransform::identity()); + self.prepared_scene + .instance_colors + .resize(drawable_order.len(), InstanceColor::transparent()); + self.prepared_scene + .instance_metadata + .resize(drawable_order.len(), InstanceMetadata::default()); + + if let Some(first_dirty_slot) = first_dirty_instance_slot { + for (slot, &node_id) in drawable_order.iter().enumerate().skip(first_dirty_slot) { + let draw_command = self + .draw_tree + .get(node_id) + .expect("drawable order must only contain live node ids"); + + self.prepared_scene.instance_transforms[slot] = + build_instance_transform(draw_command); + self.prepared_scene.instance_colors[slot] = build_instance_color(draw_command); + self.prepared_scene.instance_metadata[slot] = + build_instance_metadata(draw_command, slot, node_instance_clip_rects[node_id]); + } + } + + let live_vertex_count = self.prepared_scene.geometry_vertices_len(); + if live_vertex_count > 0 { + let geometry_vertices = &self.prepared_scene.geometry_vertices[..live_vertex_count]; + let reallocated = ensure_gpu_buffer_capacity( &self.device, - &self.queue, &mut self.aggregated_vertex_buffer, "Aggregated Vertex Buffer", - bytemuck::cast_slice(&self.temp_vertices), BufferUsages::VERTEX | BufferUsages::COPY_DST, + std::mem::size_of_val(geometry_vertices), ); + + let aggregated_vertex_buffer = self.aggregated_vertex_buffer.as_ref().unwrap(); + if requires_full_geometry_upload || reallocated { + write_full_buffer(&self.queue, aggregated_vertex_buffer, geometry_vertices); + } else if !dirty_vertex_ranges.is_empty() { + coalesce_ranges(&mut dirty_vertex_ranges); + write_buffer_ranges( + &self.queue, + aggregated_vertex_buffer, + geometry_vertices, + &dirty_vertex_ranges, + ); + } + } else { + self.ensure_placeholder_geometry_buffers(); } - if !self.temp_indices.is_empty() { - upsert_gpu_buffer( + let live_index_count = self.prepared_scene.geometry_indices_len(); + if live_index_count > 0 { + let geometry_indices = &self.prepared_scene.geometry_indices[..live_index_count]; + let reallocated = ensure_gpu_buffer_capacity( &self.device, - &self.queue, &mut self.aggregated_index_buffer, "Aggregated Index Buffer", - bytemuck::cast_slice(&self.temp_indices), BufferUsages::INDEX | BufferUsages::COPY_DST, + std::mem::size_of_val(geometry_indices), ); + + let aggregated_index_buffer = self.aggregated_index_buffer.as_ref().unwrap(); + if requires_full_geometry_upload || reallocated { + write_full_buffer(&self.queue, aggregated_index_buffer, geometry_indices); + } else if !dirty_index_ranges.is_empty() { + coalesce_ranges(&mut dirty_index_ranges); + write_buffer_ranges( + &self.queue, + aggregated_index_buffer, + geometry_indices, + &dirty_index_ranges, + ); + } + } else { + self.ensure_placeholder_geometry_buffers(); } - self.ensure_identity_instance_buffers(); + if !self.prepared_scene.instance_transforms.is_empty() { + let instance_update_range = first_dirty_instance_slot.map(|first_dirty_slot| { + first_dirty_slot..self.prepared_scene.instance_transforms.len() + }); - if !self.temp_instance_transforms.is_empty() { - upsert_gpu_buffer( + let reallocated = ensure_gpu_buffer_capacity( &self.device, - &self.queue, &mut self.aggregated_instance_transform_buffer, "Aggregated Instance Transform Buffer", - bytemuck::cast_slice(&self.temp_instance_transforms), BufferUsages::VERTEX | BufferUsages::COPY_DST, + std::mem::size_of_val(self.prepared_scene.instance_transforms.as_slice()), ); - } - if !self.temp_instance_colors.is_empty() { - upsert_gpu_buffer( + let aggregated_instance_transform_buffer = + self.aggregated_instance_transform_buffer.as_ref().unwrap(); + match instance_update_range.as_ref() { + Some(instance_update_range) if !reallocated => { + write_buffer_ranges( + &self.queue, + aggregated_instance_transform_buffer, + &self.prepared_scene.instance_transforms, + std::slice::from_ref(instance_update_range), + ); + } + _ => { + write_full_buffer( + &self.queue, + aggregated_instance_transform_buffer, + &self.prepared_scene.instance_transforms, + ); + } + } + + let reallocated = ensure_gpu_buffer_capacity( &self.device, - &self.queue, &mut self.aggregated_instance_color_buffer, "Aggregated Instance Color Buffer", - bytemuck::cast_slice(&self.temp_instance_colors), BufferUsages::VERTEX | BufferUsages::COPY_DST, + std::mem::size_of_val(self.prepared_scene.instance_colors.as_slice()), ); - } - if !self.temp_instance_metadata.is_empty() { - upsert_gpu_buffer( + let aggregated_instance_color_buffer = + self.aggregated_instance_color_buffer.as_ref().unwrap(); + match instance_update_range.as_ref() { + Some(instance_update_range) if !reallocated => { + write_buffer_ranges( + &self.queue, + aggregated_instance_color_buffer, + &self.prepared_scene.instance_colors, + std::slice::from_ref(instance_update_range), + ); + } + _ => { + write_full_buffer( + &self.queue, + aggregated_instance_color_buffer, + &self.prepared_scene.instance_colors, + ); + } + } + + let reallocated = ensure_gpu_buffer_capacity( &self.device, - &self.queue, &mut self.aggregated_instance_metadata_buffer, "Aggregated Instance Metadata Buffer", - bytemuck::cast_slice(&self.temp_instance_metadata), BufferUsages::VERTEX | BufferUsages::COPY_DST, + std::mem::size_of_val(self.prepared_scene.instance_metadata.as_slice()), ); + + let aggregated_instance_metadata_buffer = + self.aggregated_instance_metadata_buffer.as_ref().unwrap(); + match instance_update_range.as_ref() { + Some(instance_update_range) if !reallocated => { + write_buffer_ranges( + &self.queue, + aggregated_instance_metadata_buffer, + &self.prepared_scene.instance_metadata, + std::slice::from_ref(instance_update_range), + ); + } + _ => { + write_full_buffer( + &self.queue, + aggregated_instance_metadata_buffer, + &self.prepared_scene.instance_metadata, + ); + } + } } + std::mem::swap(&mut self.prepared_scene.drawable_order, &mut drawable_order); + drawable_order.clear(); + self.prepared_scene.drawable_order_scratch = drawable_order; + + depth_first_nodes.clear(); + self.prepared_scene.depth_first_nodes_scratch = depth_first_nodes; + traversal_stack.clear(); + self.prepared_scene.traversal_stack_scratch = traversal_stack; + self.prepared_scene.node_instance_clip_rects_scratch = node_instance_clip_rects; + self.prepared_scene.node_children_clip_rects_scratch = node_children_clip_rects; + + dirty_vertex_ranges.clear(); + self.prepared_scene.geometry_vertex_upload_ranges = dirty_vertex_ranges; + dirty_index_ranges.clear(); + self.prepared_scene.geometry_index_upload_ranges = dirty_index_ranges; + + self.prepared_scene.finish_prepare(); self.last_prepare_cpu_time = prepare_started_at.elapsed(); } } + +#[cfg(test)] +mod tests { + use super::{coalesce_ranges, find_first_mismatched_slot}; + + #[test] + fn coalesce_ranges_merges_overlapping_and_adjacent_ranges() { + let mut dirty_ranges = vec![6..8, 0..2, 2..4, 5..6]; + coalesce_ranges(&mut dirty_ranges); + + assert_eq!(dirty_ranges, vec![0..4, 5..8]); + } + + #[test] + fn find_first_mismatched_slot_detects_length_changes_after_shared_prefix() { + assert_eq!(find_first_mismatched_slot(&[1, 2, 3], &[1, 2]), Some(2)); + assert_eq!(find_first_mismatched_slot(&[1, 2], &[1, 2, 3]), Some(2)); + assert_eq!(find_first_mismatched_slot(&[1, 2, 3], &[1, 4, 3]), Some(1)); + assert_eq!(find_first_mismatched_slot(&[1, 2, 3], &[1, 2, 3]), None); + } +} diff --git a/src/renderer/readback.rs b/src/renderer/readback.rs index 5a444ab..b4697e7 100644 --- a/src/renderer/readback.rs +++ b/src/renderer/readback.rs @@ -25,6 +25,37 @@ fn copy_padded_readback_rows( } impl<'a> Renderer<'a> { + pub fn render_headless_frame(&mut self) { + self.prepare_render(); + + let (width, height) = self.physical_size; + let size_changed = self.rtb_cached_width != width || self.rtb_cached_height != height; + if size_changed { + self.rtb_cached_width = width; + self.rtb_cached_height = height; + } + + if size_changed || self.rtb_offscreen_texture.is_none() { + self.rtb_offscreen_texture = Some(create_offscreen_color_texture( + &self.device, + (width, height), + self.config.format, + )); + } + + let texture_view = self + .rtb_offscreen_texture + .as_ref() + .unwrap() + .create_view(&wgpu::TextureViewDescriptor::default()); + + let output_texture = self.rtb_offscreen_texture.take(); + self.render_to_texture_view(&texture_view, output_texture.as_ref()); + self.rtb_offscreen_texture = output_texture; + + let _ = self.device.poll(wgpu::MaintainBase::Wait); + } + fn map_readback_buffer_into( device: &wgpu::Device, buffer: &wgpu::Buffer, diff --git a/src/renderer/rect_utils.rs b/src/renderer/rect_utils.rs index 9b6ba4f..582d5fd 100644 --- a/src/renderer/rect_utils.rs +++ b/src/renderer/rect_utils.rs @@ -4,6 +4,66 @@ use super::types::DrawCommand; use crate::effect::EffectInstance; use crate::vertex::InstanceTransform; +#[derive(Clone, Copy, Debug, PartialEq)] +pub(super) struct LogicalClipRect { + pub(super) min_x: f32, + pub(super) min_y: f32, + pub(super) max_x: f32, + pub(super) max_y: f32, +} + +impl LogicalClipRect { + pub(super) fn intersect(self, other: Self) -> Self { + Self { + min_x: self.min_x.max(other.min_x), + min_y: self.min_y.max(other.min_y), + max_x: self.max_x.min(other.max_x), + max_y: self.max_y.min(other.max_y), + } + } + + pub(super) fn to_physical_scissor( + self, + scale_factor: f64, + physical_size: (u32, u32), + ) -> (u32, u32, u32, u32) { + let scale_factor = scale_factor as f32; + let px_min_x = ((self.min_x * scale_factor).floor().max(0.0) as u32).min(physical_size.0); + let px_min_y = ((self.min_y * scale_factor).floor().max(0.0) as u32).min(physical_size.1); + let px_max_x = (self.max_x * scale_factor) + .ceil() + .clamp(0.0, physical_size.0 as f32) as u32; + let px_max_y = (self.max_y * scale_factor) + .ceil() + .clamp(0.0, physical_size.1 as f32) as u32; + + ( + px_min_x, + px_min_y, + px_max_x.saturating_sub(px_min_x), + px_max_y.saturating_sub(px_min_y), + ) + } +} + +pub(super) fn should_use_discard_rect_clip( + clip_rect: LogicalClipRect, + canvas_logical_size: (f32, f32), +) -> bool { + let canvas_clip_rect = LogicalClipRect { + min_x: 0.0, + min_y: 0.0, + max_x: canvas_logical_size.0, + max_y: canvas_logical_size.1, + }; + let effective_clip_rect = clip_rect.intersect(canvas_clip_rect); + let clip_width = (effective_clip_rect.max_x - effective_clip_rect.min_x).max(0.0); + let clip_height = (effective_clip_rect.max_y - effective_clip_rect.min_y).max(0.0); + let canvas_area = (canvas_logical_size.0 * canvas_logical_size.1).max(1.0); + + clip_width * clip_height <= canvas_area * 0.25 +} + #[derive(Clone, Copy)] pub(super) struct AxisAlignedRectTransform { pub(super) scale_x: f32, @@ -66,17 +126,10 @@ pub(super) fn should_skip_visible_rect_draw( extract_axis_aligned_rect_transform(draw_command.transform()).is_some() } -/// Compute a screen-space scissor rect from a local-space axis-aligned rect and its transform. -/// -/// Returns `Some((x, y, width, height))` in physical pixels if the transform preserves -/// axis-alignment (identity, translation, and/or scale — no rotation, skew, or perspective). -/// Returns `None` if scissor clipping cannot be used (the caller should fall back to stencil). -pub(super) fn compute_scissor_rect( +pub(super) fn compute_logical_clip_rect( rect: [(f32, f32); 2], transform: Option, - scale_factor: f64, - physical_size: (u32, u32), -) -> Option<(u32, u32, u32, u32)> { +) -> Option { let axis_aligned_transform = extract_axis_aligned_rect_transform(transform)?; let x0 = rect[0].0 * axis_aligned_transform.scale_x + axis_aligned_transform.translate_x; @@ -84,25 +137,58 @@ pub(super) fn compute_scissor_rect( let x1 = rect[1].0 * axis_aligned_transform.scale_x + axis_aligned_transform.translate_x; let y1 = rect[1].1 * axis_aligned_transform.scale_y + axis_aligned_transform.translate_y; - let min_x = x0.min(x1); - let min_y = y0.min(y1); - let max_x = x0.max(x1); - let max_y = y0.max(y1); + Some(LogicalClipRect { + min_x: x0.min(x1), + min_y: y0.min(y1), + max_x: x0.max(x1), + max_y: y0.max(y1), + }) +} - let scale_factor = scale_factor as f32; - let px_min_x = ((min_x * scale_factor).floor().max(0.0) as u32).min(physical_size.0); - let px_min_y = ((min_y * scale_factor).floor().max(0.0) as u32).min(physical_size.1); - let px_max_x = (max_x * scale_factor).ceil().min(physical_size.0 as f32) as u32; - let px_max_y = (max_y * scale_factor).ceil().min(physical_size.1 as f32) as u32; +pub(super) fn try_logical_clip_rect_for_draw_command( + draw_command: &DrawCommand, +) -> Option { + if !draw_command.is_rect() { + return None; + } - let width = px_max_x.saturating_sub(px_min_x); - let height = px_max_y.saturating_sub(px_min_y); + let rect_bounds = draw_command.rect_bounds()?; + compute_logical_clip_rect(rect_bounds, draw_command.transform()) +} + +/// Compute a screen-space scissor rect from a local-space axis-aligned rect and its transform. +/// +/// Returns `Some((x, y, width, height))` in physical pixels if the transform preserves +/// axis-alignment (identity, translation, and/or scale — no rotation, skew, or perspective). +/// Returns `None` if scissor clipping cannot be used (the caller should fall back to stencil). +pub(super) fn compute_scissor_rect( + rect: [(f32, f32); 2], + transform: Option, + scale_factor: f64, + physical_size: (u32, u32), +) -> Option<(u32, u32, u32, u32)> { + Some( + compute_logical_clip_rect(rect, transform)? + .to_physical_scissor(scale_factor, physical_size), + ) +} - Some((px_min_x, px_min_y, width, height)) +/// Check whether a non-leaf draw command is eligible for scissor clipping, +/// and if so, compute the scissor rect. This centralizes the eligibility logic +/// so pre-visit and post-visit make the same deterministic decision. +pub(super) fn try_scissor_for_rect( + draw_command: &DrawCommand, + scale_factor: f64, + physical_size: (u32, u32), +) -> Option<(u32, u32, u32, u32)> { + if !draw_command.is_rect() { + return None; + } + let rect_bounds = draw_command.rect_bounds()?; + let transform = draw_command.transform(); + compute_scissor_rect(rect_bounds, transform, scale_factor, physical_size) } -/// Intersect two scissor rects, returning the overlapping region. -/// If the rects don't overlap, returns a zero-size rect. pub(super) fn intersect_scissor( a: (u32, u32, u32, u32), b: (u32, u32, u32, u32), @@ -123,25 +209,12 @@ pub(super) fn intersect_scissor( (left, top, width, height) } -/// Check whether a non-leaf draw command is eligible for scissor clipping, -/// and if so, compute the scissor rect. This centralizes the eligibility logic -/// so pre-visit and post-visit make the same deterministic decision. -pub(super) fn try_scissor_for_rect( - draw_command: &DrawCommand, - scale_factor: f64, - physical_size: (u32, u32), -) -> Option<(u32, u32, u32, u32)> { - if !draw_command.is_rect() { - return None; - } - let rect_bounds = draw_command.rect_bounds()?; - let transform = draw_command.transform(); - compute_scissor_rect(rect_bounds, transform, scale_factor, physical_size) -} - #[cfg(test)] mod tests { - use super::{compute_scissor_rect, should_skip_visible_rect_draw, try_scissor_for_rect}; + use super::{ + compute_logical_clip_rect, compute_scissor_rect, should_skip_visible_rect_draw, + try_logical_clip_rect_for_draw_command, try_scissor_for_rect, LogicalClipRect, + }; use crate::effect::EffectInstance; use crate::gradient::types::{ ColorInterpolation, Fill, Gradient, GradientStop, GradientStopOffset, LinearGradientDesc, @@ -184,6 +257,23 @@ mod tests { assert_eq!(scissor, Some((10, 5, 20, 15))); } + #[test] + fn logical_clip_rect_preserves_logical_coordinates() { + let transform = TransformInstance::affine_2d(2.0, 0.0, 0.0, -3.0, 10.0, 20.0); + + let clip_rect = compute_logical_clip_rect([(0.0, 0.0), (10.0, 5.0)], Some(transform)); + + assert_eq!( + clip_rect, + Some(LogicalClipRect { + min_x: 10.0, + min_y: 5.0, + max_x: 30.0, + max_y: 20.0, + }) + ); + } + #[test] fn scissor_rejects_non_axis_aligned_transform() { let mut draw_command = DrawCommand::Shape(crate::shape::ShapeDrawData::new( @@ -240,6 +330,25 @@ mod tests { )); } + #[test] + fn logical_clip_rect_for_draw_command_accepts_axis_aligned_rect() { + let mut draw_command = DrawCommand::Shape(crate::shape::ShapeDrawData::new( + Shape::rect([(0.0, 0.0), (10.0, 10.0)], Stroke::default()), + None, + )); + draw_command.set_transform(TransformInstance::translation(5.0, 8.0)); + + assert_eq!( + try_logical_clip_rect_for_draw_command(&draw_command), + Some(LogicalClipRect { + min_x: 5.0, + min_y: 8.0, + max_x: 15.0, + max_y: 18.0, + }) + ); + } + #[test] fn skip_visible_rect_draw_accepts_untextured_none_color_rect() { let draw_command = DrawCommand::Shape(crate::shape::ShapeDrawData::new( diff --git a/src/renderer/rendering.rs b/src/renderer/rendering.rs index feb60d4..96d8166 100644 --- a/src/renderer/rendering.rs +++ b/src/renderer/rendering.rs @@ -28,6 +28,7 @@ impl<'a> Renderer<'a> { let mut stencil_stack = std::mem::take(&mut self.scratch.stencil_stack); let skipped_stack = std::mem::take(&mut self.scratch.skipped_stack); let mut scissor_stack = std::mem::take(&mut self.scratch.scissor_stack); + let mut logical_clip_stack = std::mem::take(&mut self.scratch.logical_clip_stack); let mut clip_kind_stack = std::mem::take(&mut self.scratch.clip_kind_stack); let mut backdrop_work_textures = std::mem::take(&mut self.scratch.backdrop_work_textures); @@ -185,6 +186,7 @@ impl<'a> Renderer<'a> { &mut backdrop_work_textures, &mut stencil_stack, &mut scissor_stack, + &mut logical_clip_stack, &mut clip_kind_stack, self.scale_factor, self.physical_size, @@ -264,6 +266,7 @@ impl<'a> Renderer<'a> { &mut backdrop_work_textures, &mut stencil_stack, &mut scissor_stack, + &mut logical_clip_stack, &mut clip_kind_stack, scale_factor, physical_size, @@ -374,6 +377,7 @@ impl<'a> Renderer<'a> { &mut backdrop_work_textures, &mut stencil_stack, &mut scissor_stack, + &mut logical_clip_stack, &mut clip_kind_stack, self.scale_factor, self.physical_size, @@ -404,6 +408,7 @@ impl<'a> Renderer<'a> { self.scratch.stencil_stack = stencil_stack; self.scratch.skipped_stack = skipped_stack; self.scratch.scissor_stack = scissor_stack; + self.scratch.logical_clip_stack = logical_clip_stack; self.scratch.clip_kind_stack = clip_kind_stack; self.scratch.backdrop_work_textures = backdrop_work_textures; diff --git a/src/renderer/types.rs b/src/renderer/types.rs index 6caeb32..7e64e95 100644 --- a/src/renderer/types.rs +++ b/src/renderer/types.rs @@ -4,6 +4,7 @@ use ahash::{HashMap, HashMapExt}; use crate::effect::{self, EffectInstance, LoadedEffect}; use crate::gradient::types::Fill; +use crate::renderer::rect_utils::LogicalClipRect; use crate::shape::{CachedShapeDrawData, DrawShapeCommand, ShapeDrawData}; use crate::texture_manager::TextureManager; use crate::util::GradientCache; @@ -89,6 +90,13 @@ impl DrawCommand { } } + pub(super) fn instance_index(&self) -> Option { + match self { + DrawCommand::Shape(shape) => shape.instance_index(), + DrawCommand::CachedShape(cached_shape) => cached_shape.instance_index(), + } + } + pub(super) fn set_fill(&mut self, fill: Option) { match self { DrawCommand::Shape(shape) => shape.set_fill(fill), @@ -174,14 +182,36 @@ impl DrawCommand { } } + pub(super) fn set_prepared_geometry( + &mut self, + index_buffer_range: Option<(usize, usize)>, + is_empty: bool, + ) { + match self { + DrawCommand::Shape(shape) => { + shape.index_buffer_range = index_buffer_range; + shape.is_empty = is_empty; + } + DrawCommand::CachedShape(cached_shape) => { + cached_shape.index_buffer_range = index_buffer_range; + cached_shape.is_empty = is_empty; + } + } + } + + pub(super) fn set_instance_index(&mut self, instance_index: Option) { + match self { + DrawCommand::Shape(shape) => shape.instance_index = instance_index, + DrawCommand::CachedShape(cached_shape) => cached_shape.instance_index = instance_index, + } + } + pub(super) fn clear_frame_state(&mut self) { match self { DrawCommand::Shape(shape) => { - shape.index_buffer_range = None; shape.stencil_ref = None; } DrawCommand::CachedShape(cached_shape) => { - cached_shape.index_buffer_range = None; cached_shape.stencil_ref = None; } } @@ -213,6 +243,8 @@ pub(super) enum ClipKind { NonClipping, /// Parent clips children via hardware scissor rect. Scissor, + /// Parent clips children via inherited per-instance rect discard. + DiscardRectClip, /// Parent clips children via stencil increment/decrement. Stencil, } @@ -358,9 +390,11 @@ pub(super) struct RendererScratch { pub(super) effect_output_textures: Vec, pub(super) stencil_stack: Vec, pub(super) skipped_stack: Vec, - /// Stack of intersected scissor rects (x, y, width, height) in physical pixels. - /// Used to replace stencil clipping for axis-aligned rect parents. + /// Temporary scissor stack in physical pixels, used only for fullscreen effect + /// composite draws that cannot consume per-instance clip metadata. pub(super) scissor_stack: Vec<(u32, u32, u32, u32)>, + /// Stack of intersected logical clip rects inherited from axis-aligned clip parents. + pub(super) logical_clip_stack: Vec, /// Parallel stack to `stencil_stack`: records which clipping strategy each /// non-leaf parent used so the `Post` path avoids re-evaluating eligibility. pub(super) clip_kind_stack: Vec, @@ -381,6 +415,7 @@ impl RendererScratch { stencil_stack: Vec::new(), skipped_stack: Vec::new(), scissor_stack: Vec::new(), + logical_clip_stack: Vec::new(), clip_kind_stack: Vec::new(), backdrop_work_textures: Vec::new(), readback_bytes: Vec::new(), @@ -396,6 +431,7 @@ impl RendererScratch { self.stencil_stack.clear(); self.skipped_stack.clear(); self.scissor_stack.clear(); + self.logical_clip_stack.clear(); self.clip_kind_stack.clear(); self.backdrop_work_textures.clear(); self.traversal_scratch.begin(); @@ -414,6 +450,7 @@ impl RendererScratch { trim_vector_if_needed(&mut self.stencil_stack, MAX_STENCIL_STACK_CAPACITY); trim_vector_if_needed(&mut self.skipped_stack, MAX_SKIPPED_STACK_CAPACITY); trim_vector_if_needed(&mut self.scissor_stack, MAX_SCISSOR_STACK_CAPACITY); + trim_vector_if_needed(&mut self.logical_clip_stack, MAX_SCISSOR_STACK_CAPACITY); trim_vector_if_needed(&mut self.clip_kind_stack, MAX_SCISSOR_STACK_CAPACITY); trim_vector_if_needed( &mut self.backdrop_work_textures, @@ -445,6 +482,7 @@ where #[derive(Debug, Clone, Copy)] pub(super) struct BufferSizingDecision { pub(super) should_reallocate: bool, + pub(super) target_size: u64, } pub(super) fn decide_buffer_sizing( @@ -452,11 +490,23 @@ pub(super) fn decide_buffer_sizing( required_size: usize, ) -> BufferSizingDecision { let required_size = required_size as u64; - let should_reallocate = existing_size - .map(|size| size < required_size) - .unwrap_or(true); + let current_size = existing_size.unwrap_or(0); + let should_reallocate = current_size < required_size; - BufferSizingDecision { should_reallocate } + let target_size = if should_reallocate { + let mut next_size = current_size.max(256); + while next_size < required_size { + next_size = next_size.saturating_mul(2); + } + next_size + } else { + current_size + }; + + BufferSizingDecision { + should_reallocate, + target_size, + } } #[cfg(test)] @@ -467,18 +517,21 @@ mod tests { fn decide_buffer_sizing_reallocates_when_missing() { let decision = decide_buffer_sizing(None, 128); assert!(decision.should_reallocate); + assert!(decision.target_size >= 128); } #[test] fn decide_buffer_sizing_reallocates_when_too_small() { let decision = decide_buffer_sizing(Some(64), 128); assert!(decision.should_reallocate); + assert!(decision.target_size >= 128); } #[test] fn decide_buffer_sizing_keeps_buffer_when_large_enough() { let decision = decide_buffer_sizing(Some(512), 128); assert!(!decision.should_reallocate); + assert_eq!(decision.target_size, 512); } #[test] diff --git a/src/shaders/shader.wgsl b/src/shaders/shader.wgsl index 461184a..2b06e8c 100644 --- a/src/shaders/shader.wgsl +++ b/src/shaders/shader.wgsl @@ -19,6 +19,10 @@ struct VertexInput { // Per-instance bitmask: bit 0 = layer 0 active, bit 1 = layer 1 active. // 0 = solid fill only (skip all texture samples). @location(10) texture_flags: f32, + // Inherited axis-aligned clip rect in logical screen coordinates. + // `clip_rect_min.x > clip_rect_max.x` means "no inherited rect clip". + @location(11) clip_rect_min: vec2, + @location(12) clip_rect_max: vec2, }; struct VertexOutput { @@ -27,6 +31,9 @@ struct VertexOutput { @location(1) tex_coords: vec2, @location(2) coverage: f32, @location(3) @interpolate(flat) texture_flags: f32, + @location(4) screen_pos: vec2, + @location(5) @interpolate(flat) clip_rect_min: vec2, + @location(6) @interpolate(flat) clip_rect_max: vec2, }; struct GradientVertexOutput { @@ -39,6 +46,13 @@ struct GradientVertexOutput { @location(4) model_pos: vec2, // Screen-space position (pixel coordinates, after transform) @location(5) screen_pos: vec2, + @location(6) @interpolate(flat) clip_rect_min: vec2, + @location(7) @interpolate(flat) clip_rect_max: vec2, +}; + +struct PositionedVertex { + clip_position: vec4, + screen_pos: vec2, }; // This is a struct that will be used for position normalization @@ -188,7 +202,7 @@ fn evaluate_gradient(model_pos: vec2, screen_pos: vec2) -> vec4 { return textureSampleLevel(t_gradient_ramp, s_gradient_ramp, uv, 0.0); } -fn compute_vertex_position(input: VertexInput) -> vec4 { +fn compute_positioned_vertex(input: VertexInput) -> PositionedVertex { // Build the transform matrix from column-major CPU data. // Each vec4 (t_col0..t_col3) is one column of the matrix. WGSL's mat4x4 // constructor treats each argument as a column, so this is a direct mapping. @@ -266,57 +280,56 @@ fn compute_vertex_position(input: VertexInput) -> vec4 { // I don't have a particular use case for it right now, so I'm leaving it as is. // If you want to enable intersection without transparency, change the pipeline to enable depth test/write with // less-equal function. (set depth_compare: wgpu::CompareFunction::LessEqual on the stencil/depth state) - return vec4(ndc_x, ndc_y, biased_depth, 1.0); + return PositionedVertex( + vec4(ndc_x, ndc_y, biased_depth, 1.0), + vec2(final_px, final_py), + ); } @vertex fn vs_main(input: VertexInput) -> VertexOutput { var output: VertexOutput; - output.position = compute_vertex_position(input); + let positioned_vertex = compute_positioned_vertex(input); + output.position = positioned_vertex.clip_position; output.color = input.color; output.tex_coords = input.tex_coords; output.coverage = input.coverage; output.texture_flags = input.texture_flags; + output.screen_pos = positioned_vertex.screen_pos; + output.clip_rect_min = input.clip_rect_min; + output.clip_rect_max = input.clip_rect_max; return output; } @vertex fn vs_main_gradient(input: VertexInput) -> GradientVertexOutput { var output: GradientVertexOutput; - output.position = compute_vertex_position(input); + let positioned_vertex = compute_positioned_vertex(input); + output.position = positioned_vertex.clip_position; output.color = input.color; output.tex_coords = input.tex_coords; output.coverage = input.coverage; output.texture_flags = input.texture_flags; output.model_pos = input.position; + output.screen_pos = positioned_vertex.screen_pos; + output.clip_rect_min = input.clip_rect_min; + output.clip_rect_max = input.clip_rect_max; + return output; +} - let model: mat4x4 = mat4x4(input.t_col0, input.t_col1, input.t_col2, input.t_col3); - let p = model * vec4(input.position, 0.0, 1.0); - let invw = 1.0 / max(abs(p.w), 1e-6); - let px = p.x * invw; - let py = p.y * invw; - - var final_px = px; - var final_py = py; - if (input.coverage < 1.0) { - let epsilon = 0.01; - let p2 = model * vec4(input.position + input.normal * epsilon, 0.0, 1.0); - let invw2 = 1.0 / max(abs(p2.w), 1e-6); - let px2 = p2.x * invw2; - let py2 = p2.y * invw2; - let screen_dir = vec2(px2 - px, py2 - py); - let screen_len = length(screen_dir); - - if (screen_len > 1e-8) { - let unit_dir = screen_dir / screen_len; - let fringe_width = uniforms.fringe_width / uniforms.scale_factor; - final_px = px + unit_dir.x * fringe_width; - final_py = py + unit_dir.y * fringe_width; - } +fn should_discard_for_clip_rect( + screen_pos: vec2, + clip_rect_min: vec2, + clip_rect_max: vec2, +) -> bool { + if clip_rect_min.x > clip_rect_max.x || clip_rect_min.y > clip_rect_max.y { + return false; } - output.screen_pos = vec2(final_px, final_py); - return output; + return screen_pos.x < clip_rect_min.x + || screen_pos.y < clip_rect_min.y + || screen_pos.x >= clip_rect_max.x + || screen_pos.y >= clip_rect_max.y; } // Computes the final premultiplied color for a fragment given fill color, texture @@ -398,7 +411,13 @@ fn fs_main( @location(1) tex_coords: vec2, @location(2) coverage: f32, @location(3) @interpolate(flat) texture_flags: f32, + @location(4) screen_pos: vec2, + @location(5) @interpolate(flat) clip_rect_min: vec2, + @location(6) @interpolate(flat) clip_rect_max: vec2, ) -> @location(0) vec4 { + if should_discard_for_clip_rect(screen_pos, clip_rect_min, clip_rect_max) { + discard; + } return compute_fragment_color(color, tex_coords, coverage, texture_flags); } @@ -410,15 +429,27 @@ fn fs_main_gradient( @location(3) @interpolate(flat) texture_flags: f32, @location(4) model_pos: vec2, @location(5) screen_pos: vec2, + @location(6) @interpolate(flat) clip_rect_min: vec2, + @location(7) @interpolate(flat) clip_rect_max: vec2, ) -> @location(0) vec4 { + if should_discard_for_clip_rect(screen_pos, clip_rect_min, clip_rect_max) { + discard; + } return compute_gradient_fragment_color(tex_coords, coverage, texture_flags, model_pos, screen_pos); } // Used by stencil-only passes that write no color. Color work is skipped entirely; // only the fixed-function stencil operation matters for these draws. -// NOTE: do not add discard here — that would also kill the stencil write. +// Inherited rect clips still need to suppress stencil writes outside the clip. @fragment -fn fs_stencil_only() -> @location(0) vec4 { +fn fs_stencil_only( + @location(4) screen_pos: vec2, + @location(5) @interpolate(flat) clip_rect_min: vec2, + @location(6) @interpolate(flat) clip_rect_max: vec2, +) -> @location(0) vec4 { + if should_discard_for_clip_rect(screen_pos, clip_rect_min, clip_rect_max) { + discard; + } return vec4(0.0, 0.0, 0.0, 0.0); } @@ -431,7 +462,13 @@ fn fs_passthrough( @location(1) tex_coords: vec2, @location(2) coverage: f32, @location(3) @interpolate(flat) texture_flags: f32, + @location(4) screen_pos: vec2, + @location(5) @interpolate(flat) clip_rect_min: vec2, + @location(6) @interpolate(flat) clip_rect_max: vec2, ) -> @location(0) vec4 { + if should_discard_for_clip_rect(screen_pos, clip_rect_min, clip_rect_max) { + discard; + } return compute_fragment_color(color, tex_coords, coverage, texture_flags); } @@ -443,6 +480,11 @@ fn fs_passthrough_gradient( @location(3) @interpolate(flat) texture_flags: f32, @location(4) model_pos: vec2, @location(5) screen_pos: vec2, + @location(6) @interpolate(flat) clip_rect_min: vec2, + @location(7) @interpolate(flat) clip_rect_max: vec2, ) -> @location(0) vec4 { + if should_discard_for_clip_rect(screen_pos, clip_rect_min, clip_rect_max) { + discard; + } return compute_gradient_fragment_color(tex_coords, coverage, texture_flags, model_pos, screen_pos); } diff --git a/src/shape.rs b/src/shape.rs index affb547..a4abccf 100644 --- a/src/shape.rs +++ b/src/shape.rs @@ -1474,7 +1474,6 @@ pub(crate) trait DrawShapeCommand { fn index_buffer_range(&self) -> Option<(usize, usize)>; // (start_index, index_count) fn is_empty(&self) -> bool; fn stencil_ref_mut(&mut self) -> &mut Option; - fn instance_index_mut(&mut self) -> &mut Option; fn instance_index(&self) -> Option; fn transform(&self) -> Option; fn set_transform(&mut self, t: InstanceTransform); @@ -1506,11 +1505,6 @@ impl DrawShapeCommand for ShapeDrawData { &mut self.stencil_ref } - #[inline] - fn instance_index_mut(&mut self) -> &mut Option { - &mut self.instance_index - } - #[inline] fn instance_index(&self) -> Option { self.instance_index @@ -1601,11 +1595,6 @@ impl DrawShapeCommand for CachedShapeDrawData { &mut self.stencil_ref } - #[inline] - fn instance_index_mut(&mut self) -> &mut Option { - &mut self.instance_index - } - #[inline] fn instance_index(&self) -> Option { self.instance_index diff --git a/src/vertex.rs b/src/vertex.rs index aa297cc..d928cc0 100644 --- a/src/vertex.rs +++ b/src/vertex.rs @@ -292,6 +292,11 @@ pub struct InstanceMetadata { /// Bitmask indicating which texture layers are active for this instance. /// 0 = no textures (solid fill only), 1 = layer 0, 2 = layer 1, 3 = both. pub texture_flags: f32, + /// Inclusive min logical clip coordinate inherited from axis-aligned clip ancestors. + /// An invalid range (`min > max`) means "no inherited rect clip". + pub clip_rect_min: [f32; 2], + /// Exclusive max logical clip coordinate inherited from axis-aligned clip ancestors. + pub clip_rect_max: [f32; 2], } impl Default for InstanceMetadata { @@ -299,6 +304,8 @@ impl Default for InstanceMetadata { Self { draw_order: 0.0, texture_flags: 0.0, + clip_rect_min: [1.0, 1.0], + clip_rect_max: [0.0, 0.0], } } } @@ -319,6 +326,16 @@ impl InstanceMetadata { offset: std::mem::size_of::() as wgpu::BufferAddress, shader_location: 10, }, + wgpu::VertexAttribute { + format: wgpu::VertexFormat::Float32x2, + offset: (std::mem::size_of::() * 2) as wgpu::BufferAddress, + shader_location: 11, + }, + wgpu::VertexAttribute { + format: wgpu::VertexFormat::Float32x2, + offset: (std::mem::size_of::() * 4) as wgpu::BufferAddress, + shader_location: 12, + }, ], } }