diff --git a/servers/rendering/renderer_rd/effects/taa.cpp b/servers/rendering/renderer_rd/effects/taa.cpp index 84bb43a14eb5..2f91d203bef4 100644 --- a/servers/rendering/renderer_rd/effects/taa.cpp +++ b/servers/rendering/renderer_rd/effects/taa.cpp @@ -47,7 +47,7 @@ TAA::~TAA() { taa_shader.version_free(shader_version); } -void TAA::resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_prev_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far) { +void TAA::resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far) { UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); ERR_FAIL_NULL(uniform_set_cache); MaterialStorage *material_storage = MaterialStorage::get_singleton(); @@ -76,11 +76,10 @@ void TAA::resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_pr RD::Uniform u_frame_source(RD::UNIFORM_TYPE_IMAGE, 0, { p_frame }); RD::Uniform u_depth(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 1, { default_sampler, p_depth }); RD::Uniform u_velocity(RD::UNIFORM_TYPE_IMAGE, 2, { p_velocity }); - RD::Uniform u_prev_velocity(RD::UNIFORM_TYPE_IMAGE, 3, { p_prev_velocity }); - RD::Uniform u_history(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 4, { default_sampler, p_history }); - RD::Uniform u_frame_dest(RD::UNIFORM_TYPE_IMAGE, 5, { p_temp }); + RD::Uniform u_history(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 3, { default_sampler, p_history }); + RD::Uniform u_frame_dest(RD::UNIFORM_TYPE_IMAGE, 4, { p_temp }); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_frame_source, u_depth, u_velocity, u_prev_velocity, u_history, u_frame_dest), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_frame_source, u_depth, u_velocity, u_history, u_frame_dest), 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(TAAResolvePushConstant)); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_resolution.width, p_resolution.height, 1); RD::get_singleton()->compute_list_end(); @@ -91,7 +90,6 @@ void TAA::process(Ref p_render_buffers, RD::DataFormat p_f uint32_t view_count = p_render_buffers->get_view_count(); Size2i internal_size = p_render_buffers->get_internal_size(); - Size2i target_size = p_render_buffers->get_target_size(); bool just_allocated = false; if (!p_render_buffers->has_texture(SNAME("taa"), SNAME("history"))) { @@ -100,8 +98,6 @@ void TAA::process(Ref p_render_buffers, RD::DataFormat p_f p_render_buffers->create_texture(SNAME("taa"), SNAME("history"), p_format, usage_bits); p_render_buffers->create_texture(SNAME("taa"), SNAME("temp"), p_format, usage_bits); - p_render_buffers->create_texture(SNAME("taa"), SNAME("prev_velocity"), RD::DATA_FORMAT_R16G16_SFLOAT, usage_bits); - just_allocated = true; } @@ -112,17 +108,15 @@ void TAA::process(Ref p_render_buffers, RD::DataFormat p_f RID internal_texture = p_render_buffers->get_internal_texture(v); RID velocity_buffer = p_render_buffers->get_velocity_buffer(false, v); RID taa_history = p_render_buffers->get_texture_slice(SNAME("taa"), SNAME("history"), v, 0); - RID taa_prev_velocity = p_render_buffers->get_texture_slice(SNAME("taa"), SNAME("prev_velocity"), v, 0); if (!just_allocated) { RID depth_texture = p_render_buffers->get_depth_texture(v); RID taa_temp = p_render_buffers->get_texture_slice(SNAME("taa"), SNAME("temp"), v, 0); - resolve(internal_texture, taa_temp, depth_texture, velocity_buffer, taa_prev_velocity, taa_history, Size2(internal_size.x, internal_size.y), p_z_near, p_z_far); + resolve(internal_texture, taa_temp, depth_texture, velocity_buffer, taa_history, Size2(internal_size.x, internal_size.y), p_z_near, p_z_far); copy_effects->copy_to_rect(taa_temp, internal_texture, Rect2(0, 0, internal_size.x, internal_size.y)); } copy_effects->copy_to_rect(internal_texture, taa_history, Rect2(0, 0, internal_size.x, internal_size.y)); - copy_effects->copy_to_rect(velocity_buffer, taa_prev_velocity, Rect2(0, 0, target_size.x, target_size.y)); } RD::get_singleton()->draw_command_end_label(); diff --git a/servers/rendering/renderer_rd/effects/taa.h b/servers/rendering/renderer_rd/effects/taa.h index 06075c66da15..fe73cba5ce5c 100644 --- a/servers/rendering/renderer_rd/effects/taa.h +++ b/servers/rendering/renderer_rd/effects/taa.h @@ -54,7 +54,7 @@ class TAA { RID shader_version; RID pipeline; - void resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_prev_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far); + void resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far); }; } // namespace RendererRD diff --git a/servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl b/servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl index 94bc126d91bb..f9d5d80f0ca1 100644 --- a/servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl @@ -19,6 +19,7 @@ // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////////////////////////////////////////////// // File changes (yyyy-mm-dd) +// 2025-12-02: Rene Prašnikar: Changed history clipping, changed disocclusion logic, removed anti-flicker algorithm, removed tonemapping step, added basic background handling // 2025-11-05: Jakub Brzyski: Added dynamic variance, base variance value adjusted to reduce ghosting // 2022-05-06: Panos Karabelas: first commit // 2020-12-05: Joan Fons: convert to Vulkan and Godot @@ -46,9 +47,8 @@ layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = 1) i layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D color_buffer; layout(set = 0, binding = 1) uniform sampler2D depth_buffer; layout(rg16f, set = 0, binding = 2) uniform restrict readonly image2D velocity_buffer; -layout(rg16f, set = 0, binding = 3) uniform restrict readonly image2D last_velocity_buffer; -layout(set = 0, binding = 4) uniform sampler2D history_buffer; -layout(rgba16f, set = 0, binding = 5) uniform restrict writeonly image2D output_buffer; +layout(set = 0, binding = 3) uniform sampler2D history_buffer; +layout(rgba16f, set = 0, binding = 4) uniform restrict writeonly image2D output_buffer; layout(push_constant, std430) uniform Params { vec2 resolution; @@ -78,13 +78,6 @@ const int kGroupSize = GROUP_SIZE; const int kTileDimension = kGroupSize + kBorderSize * 2; const int kTileDimension2 = kTileDimension * kTileDimension; -vec3 reinhard(vec3 hdr) { - return hdr / (hdr + 1.0); -} -vec3 reinhard_inverse(vec3 sdr) { - return sdr / (1.0 - sdr); -} - float get_depth(ivec2 thread_id) { return texelFetch(depth_buffer, thread_id, 0).r; } @@ -138,38 +131,6 @@ void populate_group_shared_memory(uvec2 group_id, uint group_index) { barrier(); } -/*------------------------------------------------------------------------------ - VELOCITY -------------------------------------------------------------------------------*/ - -void depth_test_min(uvec2 pos, inout float min_depth, inout uvec2 min_pos) { - float depth = load_depth(pos); - - if (depth < min_depth) { - min_depth = depth; - min_pos = pos; - } -} - -// Returns velocity with closest depth (3x3 neighborhood) -void get_closest_pixel_velocity_3x3(in uvec2 group_pos, uvec2 group_top_left, out vec2 velocity) { - float min_depth = 1.0; - uvec2 min_pos = group_pos; - - depth_test_min(group_pos + kOffsets3x3[0], min_depth, min_pos); - depth_test_min(group_pos + kOffsets3x3[1], min_depth, min_pos); - depth_test_min(group_pos + kOffsets3x3[2], min_depth, min_pos); - depth_test_min(group_pos + kOffsets3x3[3], min_depth, min_pos); - depth_test_min(group_pos + kOffsets3x3[4], min_depth, min_pos); - depth_test_min(group_pos + kOffsets3x3[5], min_depth, min_pos); - depth_test_min(group_pos + kOffsets3x3[6], min_depth, min_pos); - depth_test_min(group_pos + kOffsets3x3[7], min_depth, min_pos); - depth_test_min(group_pos + kOffsets3x3[8], min_depth, min_pos); - - // Velocity out - velocity = imageLoad(velocity_buffer, ivec2(group_top_left + min_pos)).xy; -} - /*------------------------------------------------------------------------------ HISTORY SAMPLING ------------------------------------------------------------------------------*/ @@ -231,37 +192,8 @@ vec3 sample_catmull_rom_9(sampler2D stex, vec2 uv, vec2 resolution) { HISTORY CLIPPING ------------------------------------------------------------------------------*/ -// Based on "Temporal Reprojection Anti-Aliasing" - https://github.com/playdeadgames/temporal -vec3 clip_aabb(vec3 aabb_min, vec3 aabb_max, vec3 p, vec3 q) { - vec3 r = q - p; - vec3 rmax = (aabb_max - p.xyz); - vec3 rmin = (aabb_min - p.xyz); - - if (r.x > rmax.x + FLT_MIN) { - r *= (rmax.x / r.x); - } - if (r.y > rmax.y + FLT_MIN) { - r *= (rmax.y / r.y); - } - if (r.z > rmax.z + FLT_MIN) { - r *= (rmax.z / r.z); - } - - if (r.x < rmin.x - FLT_MIN) { - r *= (rmin.x / r.x); - } - if (r.y < rmin.y - FLT_MIN) { - r *= (rmin.y / r.y); - } - if (r.z < rmin.z - FLT_MIN) { - r *= (rmin.z / r.z); - } - - return p + r; -} - // Clip history to the neighbourhood of the current sample -vec3 clip_history_3x3(uvec2 group_pos, vec3 color_history, vec2 velocity_closest) { +vec3 clip_history_3x3(uvec2 group_pos, vec3 color_history) { // Sample a 3x3 neighbourhood vec3 s1 = load_color(group_pos + kOffsets3x3[0]); vec3 s2 = load_color(group_pos + kOffsets3x3[1]); @@ -273,17 +205,10 @@ vec3 clip_history_3x3(uvec2 group_pos, vec3 color_history, vec2 velocity_closest vec3 s8 = load_color(group_pos + kOffsets3x3[7]); vec3 s9 = load_color(group_pos + kOffsets3x3[8]); - // Compute min and max (with an adaptive box size, which greatly reduces ghosting) - vec3 color_avg = (s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9) * RPC_9; - vec3 color_avg2 = ((s1 * s1) + (s2 * s2) + (s3 * s3) + (s4 * s4) + (s5 * s5) + (s6 * s6) + (s7 * s7) + (s8 * s8) + (s9 * s9)) * RPC_9; - // Use variance clipping as described in https://developer.download.nvidia.com/gameworks/events/GDC2016/msalvi_temporal_supersampling.pdf - float box_size = mix(0.0f, params.variance_dynamic, smoothstep(0.02f, 0.0f, length(velocity_closest))); - vec3 dev = sqrt(abs(color_avg2 - (color_avg * color_avg))) * box_size; - vec3 color_min = color_avg - dev; - vec3 color_max = color_avg + dev; + vec3 color_min = min(s1, min(s2, min(s3, min(s4, min(s5, min(s6, min(s7, min(s8, s9)))))))); + vec3 color_max = max(s1, max(s2, max(s3, max(s4, max(s5, max(s6, max(s7, max(s8, s9)))))))); - // Variance clipping - vec3 color = clip_aabb(color_min, color_max, clamp(color_avg, color_min, color_max), color_history); + vec3 color = clamp(color_history, color_min, color_max); // Clamp to prevent NaNs color = clamp(color, FLT_MIN, FLT_MAX); @@ -291,24 +216,35 @@ vec3 clip_history_3x3(uvec2 group_pos, vec3 color_history, vec2 velocity_closest return color; } +// Quickly converge when rendering only the background to avoid darkening high frequency background detail, like stars +vec3 background_detection(uvec2 pos_group, vec3 resolve, vec3 color_history, vec3 color_input, float blend_factor) { + float d1 = load_depth(pos_group + kOffsets3x3[0]); + float d2 = load_depth(pos_group + kOffsets3x3[1]); + float d3 = load_depth(pos_group + kOffsets3x3[2]); + float d4 = load_depth(pos_group + kOffsets3x3[3]); + float d5 = load_depth(pos_group + kOffsets3x3[4]); + float d6 = load_depth(pos_group + kOffsets3x3[5]); + float d7 = load_depth(pos_group + kOffsets3x3[6]); + float d8 = load_depth(pos_group + kOffsets3x3[7]); + float d9 = load_depth(pos_group + kOffsets3x3[8]); + float depth = d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8 + d9; + if (depth == 0) { + resolve = clamp(mix(color_history, color_input, max(0.5, blend_factor)), FLT_MIN, FLT_MAX); + } + return resolve; +} + /*------------------------------------------------------------------------------ TAA ------------------------------------------------------------------------------*/ -const vec3 lumCoeff = vec3(0.299f, 0.587f, 0.114f); - -float luminance(vec3 color) { - return max(dot(color, lumCoeff), 0.0001f); -} - // This is "velocity disocclusion" as described by https://www.elopezr.com/temporal-aa-and-the-quest-for-the-holy-trail/. // We use texel space, so our scale and threshold differ. -float get_factor_disocclusion(vec2 uv_reprojected, vec2 velocity) { - vec2 velocity_previous = imageLoad(last_velocity_buffer, ivec2(uv_reprojected * params.resolution)).xy; - vec2 velocity_texels = velocity * params.resolution; - vec2 prev_velocity_texels = velocity_previous * params.resolution; - float disocclusion = length(prev_velocity_texels - velocity_texels) - params.disocclusion_threshold; - return clamp(disocclusion * DISOCCLUSION_SCALE, 0.0, 1.0); +float get_factor_disocclusion(vec2 uv, vec2 uv_reprojected) { + vec2 velocity_current = imageLoad(velocity_buffer, ivec2(uv * params.resolution)).xy * params.resolution; + vec2 velocity_previous = imageLoad(velocity_buffer, ivec2(uv_reprojected * params.resolution)).xy * params.resolution; + float disocclusion = length(velocity_current - velocity_previous); + return clamp(disocclusion, 0.0, 1.0); } vec3 temporal_antialiasing(uvec2 pos_group_top_left, uvec2 pos_group, uvec2 pos_screen, vec2 uv, sampler2D tex_history) { @@ -324,11 +260,6 @@ vec3 temporal_antialiasing(uvec2 pos_group_top_left, uvec2 pos_group, uvec2 pos_ // Get history color (catmull-rom reduces a lot of the blurring that you get under motion) vec3 color_history = sample_catmull_rom_9(tex_history, uv_reprojected, params.resolution).rgb; - // Clip history to the neighbourhood of the current sample (fixes a lot of the ghosting). - vec2 velocity_closest = vec2(0.0); // This is best done by using the velocity with the closest depth. - get_closest_pixel_velocity_3x3(pos_group, pos_group_top_left, velocity_closest); - color_history = clip_history_3x3(pos_group, color_history, velocity_closest); - // Compute blend factor float blend_factor = RPC_16; // We want to be able to accumulate as many jitter samples as we generated, that is, 16. { @@ -336,33 +267,18 @@ vec3 temporal_antialiasing(uvec2 pos_group_top_left, uvec2 pos_group, uvec2 pos_ float factor_screen = any(lessThan(uv_reprojected, vec2(0.0))) || any(greaterThan(uv_reprojected, vec2(1.0))) ? 1.0 : 0.0; // Increase blend factor when there is disocclusion (fixes a lot of the remaining ghosting). - float factor_disocclusion = get_factor_disocclusion(uv_reprojected, velocity); + float factor_disocclusion = get_factor_disocclusion(uv, uv_reprojected); // Add to the blend factor blend_factor = clamp(blend_factor + factor_screen + factor_disocclusion, 0.0, 1.0); } // Resolve - vec3 color_resolved = vec3(0.0); - { - // Tonemap - color_history = reinhard(color_history); - color_input = reinhard(color_input); - - // Reduce flickering - float lum_color = luminance(color_input); - float lum_history = luminance(color_history); - float diff = abs(lum_color - lum_history) / max(lum_color, max(lum_history, 1.001)); - diff = 1.0 - diff; - diff = diff * diff; - blend_factor = mix(0.0, blend_factor, diff); - - // Lerp/blend - color_resolved = mix(color_history, color_input, blend_factor); - - // Inverse tonemap - color_resolved = reinhard_inverse(color_resolved); - } + vec3 color_resolved = clamp(mix(color_history, color_input, blend_factor), FLT_MIN, FLT_MAX); + + color_resolved = background_detection(pos_group, color_resolved, color_history, color_input, blend_factor); + + color_resolved = clip_history_3x3(pos_group, color_resolved); return color_resolved; }