Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 5 additions & 11 deletions servers/rendering/renderer_rd/effects/taa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ TAA::~TAA() {
taa_shader.version_free(shader_version);
}

void TAA::resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_prev_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far) {
void TAA::resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far) {
UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton();
ERR_FAIL_NULL(uniform_set_cache);
MaterialStorage *material_storage = MaterialStorage::get_singleton();
Expand Down Expand Up @@ -76,11 +76,10 @@ void TAA::resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_pr
RD::Uniform u_frame_source(RD::UNIFORM_TYPE_IMAGE, 0, { p_frame });
RD::Uniform u_depth(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 1, { default_sampler, p_depth });
RD::Uniform u_velocity(RD::UNIFORM_TYPE_IMAGE, 2, { p_velocity });
RD::Uniform u_prev_velocity(RD::UNIFORM_TYPE_IMAGE, 3, { p_prev_velocity });
RD::Uniform u_history(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 4, { default_sampler, p_history });
RD::Uniform u_frame_dest(RD::UNIFORM_TYPE_IMAGE, 5, { p_temp });
RD::Uniform u_history(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 3, { default_sampler, p_history });
RD::Uniform u_frame_dest(RD::UNIFORM_TYPE_IMAGE, 4, { p_temp });

RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_frame_source, u_depth, u_velocity, u_prev_velocity, u_history, u_frame_dest), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_frame_source, u_depth, u_velocity, u_history, u_frame_dest), 0);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(TAAResolvePushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_resolution.width, p_resolution.height, 1);
RD::get_singleton()->compute_list_end();
Expand All @@ -91,7 +90,6 @@ void TAA::process(Ref<RenderSceneBuffersRD> p_render_buffers, RD::DataFormat p_f

uint32_t view_count = p_render_buffers->get_view_count();
Size2i internal_size = p_render_buffers->get_internal_size();
Size2i target_size = p_render_buffers->get_target_size();

bool just_allocated = false;
if (!p_render_buffers->has_texture(SNAME("taa"), SNAME("history"))) {
Expand All @@ -100,8 +98,6 @@ void TAA::process(Ref<RenderSceneBuffersRD> p_render_buffers, RD::DataFormat p_f
p_render_buffers->create_texture(SNAME("taa"), SNAME("history"), p_format, usage_bits);
p_render_buffers->create_texture(SNAME("taa"), SNAME("temp"), p_format, usage_bits);

p_render_buffers->create_texture(SNAME("taa"), SNAME("prev_velocity"), RD::DATA_FORMAT_R16G16_SFLOAT, usage_bits);

just_allocated = true;
}

Expand All @@ -112,17 +108,15 @@ void TAA::process(Ref<RenderSceneBuffersRD> p_render_buffers, RD::DataFormat p_f
RID internal_texture = p_render_buffers->get_internal_texture(v);
RID velocity_buffer = p_render_buffers->get_velocity_buffer(false, v);
RID taa_history = p_render_buffers->get_texture_slice(SNAME("taa"), SNAME("history"), v, 0);
RID taa_prev_velocity = p_render_buffers->get_texture_slice(SNAME("taa"), SNAME("prev_velocity"), v, 0);

if (!just_allocated) {
RID depth_texture = p_render_buffers->get_depth_texture(v);
RID taa_temp = p_render_buffers->get_texture_slice(SNAME("taa"), SNAME("temp"), v, 0);
resolve(internal_texture, taa_temp, depth_texture, velocity_buffer, taa_prev_velocity, taa_history, Size2(internal_size.x, internal_size.y), p_z_near, p_z_far);
resolve(internal_texture, taa_temp, depth_texture, velocity_buffer, taa_history, Size2(internal_size.x, internal_size.y), p_z_near, p_z_far);
copy_effects->copy_to_rect(taa_temp, internal_texture, Rect2(0, 0, internal_size.x, internal_size.y));
}

copy_effects->copy_to_rect(internal_texture, taa_history, Rect2(0, 0, internal_size.x, internal_size.y));
copy_effects->copy_to_rect(velocity_buffer, taa_prev_velocity, Rect2(0, 0, target_size.x, target_size.y));
}

RD::get_singleton()->draw_command_end_label();
Expand Down
2 changes: 1 addition & 1 deletion servers/rendering/renderer_rd/effects/taa.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class TAA {
RID shader_version;
RID pipeline;

void resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_prev_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far);
void resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far);
};

} // namespace RendererRD
156 changes: 36 additions & 120 deletions servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////////////////////////////////////////////
// File changes (yyyy-mm-dd)
// 2025-12-02: Rene Prašnikar: Changed history clipping, changed disocclusion logic, removed anti-flicker algorithm, removed tonemapping step, added basic background handling
// 2025-11-05: Jakub Brzyski: Added dynamic variance, base variance value adjusted to reduce ghosting
// 2022-05-06: Panos Karabelas: first commit
// 2020-12-05: Joan Fons: convert to Vulkan and Godot
Expand Down Expand Up @@ -46,9 +47,8 @@ layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = 1) i
layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D color_buffer;
layout(set = 0, binding = 1) uniform sampler2D depth_buffer;
layout(rg16f, set = 0, binding = 2) uniform restrict readonly image2D velocity_buffer;
layout(rg16f, set = 0, binding = 3) uniform restrict readonly image2D last_velocity_buffer;
layout(set = 0, binding = 4) uniform sampler2D history_buffer;
layout(rgba16f, set = 0, binding = 5) uniform restrict writeonly image2D output_buffer;
layout(set = 0, binding = 3) uniform sampler2D history_buffer;
layout(rgba16f, set = 0, binding = 4) uniform restrict writeonly image2D output_buffer;

layout(push_constant, std430) uniform Params {
vec2 resolution;
Expand Down Expand Up @@ -78,13 +78,6 @@ const int kGroupSize = GROUP_SIZE;
const int kTileDimension = kGroupSize + kBorderSize * 2;
const int kTileDimension2 = kTileDimension * kTileDimension;

vec3 reinhard(vec3 hdr) {
return hdr / (hdr + 1.0);
}
vec3 reinhard_inverse(vec3 sdr) {
return sdr / (1.0 - sdr);
}

float get_depth(ivec2 thread_id) {
return texelFetch(depth_buffer, thread_id, 0).r;
}
Expand Down Expand Up @@ -138,38 +131,6 @@ void populate_group_shared_memory(uvec2 group_id, uint group_index) {
barrier();
}

/*------------------------------------------------------------------------------
VELOCITY
------------------------------------------------------------------------------*/

void depth_test_min(uvec2 pos, inout float min_depth, inout uvec2 min_pos) {
float depth = load_depth(pos);

if (depth < min_depth) {
min_depth = depth;
min_pos = pos;
}
}

// Returns velocity with closest depth (3x3 neighborhood)
void get_closest_pixel_velocity_3x3(in uvec2 group_pos, uvec2 group_top_left, out vec2 velocity) {
float min_depth = 1.0;
uvec2 min_pos = group_pos;

depth_test_min(group_pos + kOffsets3x3[0], min_depth, min_pos);
depth_test_min(group_pos + kOffsets3x3[1], min_depth, min_pos);
depth_test_min(group_pos + kOffsets3x3[2], min_depth, min_pos);
depth_test_min(group_pos + kOffsets3x3[3], min_depth, min_pos);
depth_test_min(group_pos + kOffsets3x3[4], min_depth, min_pos);
depth_test_min(group_pos + kOffsets3x3[5], min_depth, min_pos);
depth_test_min(group_pos + kOffsets3x3[6], min_depth, min_pos);
depth_test_min(group_pos + kOffsets3x3[7], min_depth, min_pos);
depth_test_min(group_pos + kOffsets3x3[8], min_depth, min_pos);

// Velocity out
velocity = imageLoad(velocity_buffer, ivec2(group_top_left + min_pos)).xy;
}

/*------------------------------------------------------------------------------
HISTORY SAMPLING
------------------------------------------------------------------------------*/
Expand Down Expand Up @@ -231,37 +192,8 @@ vec3 sample_catmull_rom_9(sampler2D stex, vec2 uv, vec2 resolution) {
HISTORY CLIPPING
------------------------------------------------------------------------------*/

// Based on "Temporal Reprojection Anti-Aliasing" - https://github.com/playdeadgames/temporal
vec3 clip_aabb(vec3 aabb_min, vec3 aabb_max, vec3 p, vec3 q) {
vec3 r = q - p;
vec3 rmax = (aabb_max - p.xyz);
vec3 rmin = (aabb_min - p.xyz);

if (r.x > rmax.x + FLT_MIN) {
r *= (rmax.x / r.x);
}
if (r.y > rmax.y + FLT_MIN) {
r *= (rmax.y / r.y);
}
if (r.z > rmax.z + FLT_MIN) {
r *= (rmax.z / r.z);
}

if (r.x < rmin.x - FLT_MIN) {
r *= (rmin.x / r.x);
}
if (r.y < rmin.y - FLT_MIN) {
r *= (rmin.y / r.y);
}
if (r.z < rmin.z - FLT_MIN) {
r *= (rmin.z / r.z);
}

return p + r;
}

// Clip history to the neighbourhood of the current sample
vec3 clip_history_3x3(uvec2 group_pos, vec3 color_history, vec2 velocity_closest) {
vec3 clip_history_3x3(uvec2 group_pos, vec3 color_history) {
// Sample a 3x3 neighbourhood
vec3 s1 = load_color(group_pos + kOffsets3x3[0]);
vec3 s2 = load_color(group_pos + kOffsets3x3[1]);
Expand All @@ -273,42 +205,46 @@ vec3 clip_history_3x3(uvec2 group_pos, vec3 color_history, vec2 velocity_closest
vec3 s8 = load_color(group_pos + kOffsets3x3[7]);
vec3 s9 = load_color(group_pos + kOffsets3x3[8]);

// Compute min and max (with an adaptive box size, which greatly reduces ghosting)
vec3 color_avg = (s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9) * RPC_9;
vec3 color_avg2 = ((s1 * s1) + (s2 * s2) + (s3 * s3) + (s4 * s4) + (s5 * s5) + (s6 * s6) + (s7 * s7) + (s8 * s8) + (s9 * s9)) * RPC_9;
// Use variance clipping as described in https://developer.download.nvidia.com/gameworks/events/GDC2016/msalvi_temporal_supersampling.pdf
float box_size = mix(0.0f, params.variance_dynamic, smoothstep(0.02f, 0.0f, length(velocity_closest)));
vec3 dev = sqrt(abs(color_avg2 - (color_avg * color_avg))) * box_size;
vec3 color_min = color_avg - dev;
vec3 color_max = color_avg + dev;
vec3 color_min = min(s1, min(s2, min(s3, min(s4, min(s5, min(s6, min(s7, min(s8, s9))))))));
vec3 color_max = max(s1, max(s2, max(s3, max(s4, max(s5, max(s6, max(s7, max(s8, s9))))))));

// Variance clipping
vec3 color = clip_aabb(color_min, color_max, clamp(color_avg, color_min, color_max), color_history);
vec3 color = clamp(color_history, color_min, color_max);

// Clamp to prevent NaNs
color = clamp(color, FLT_MIN, FLT_MAX);

return color;
}

// Quickly converge when rendering only the background to avoid darkening high frequency background detail, like stars
vec3 background_detection(uvec2 pos_group, vec3 resolve, vec3 color_history, vec3 color_input, float blend_factor) {
float d1 = load_depth(pos_group + kOffsets3x3[0]);
float d2 = load_depth(pos_group + kOffsets3x3[1]);
float d3 = load_depth(pos_group + kOffsets3x3[2]);
float d4 = load_depth(pos_group + kOffsets3x3[3]);
float d5 = load_depth(pos_group + kOffsets3x3[4]);
float d6 = load_depth(pos_group + kOffsets3x3[5]);
float d7 = load_depth(pos_group + kOffsets3x3[6]);
float d8 = load_depth(pos_group + kOffsets3x3[7]);
float d9 = load_depth(pos_group + kOffsets3x3[8]);
float depth = d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8 + d9;
if (depth == 0) {
resolve = clamp(mix(color_history, color_input, max(0.5, blend_factor)), FLT_MIN, FLT_MAX);
}
return resolve;
}

/*------------------------------------------------------------------------------
TAA
------------------------------------------------------------------------------*/

const vec3 lumCoeff = vec3(0.299f, 0.587f, 0.114f);

float luminance(vec3 color) {
return max(dot(color, lumCoeff), 0.0001f);
}

// This is "velocity disocclusion" as described by https://www.elopezr.com/temporal-aa-and-the-quest-for-the-holy-trail/.
// We use texel space, so our scale and threshold differ.
float get_factor_disocclusion(vec2 uv_reprojected, vec2 velocity) {
vec2 velocity_previous = imageLoad(last_velocity_buffer, ivec2(uv_reprojected * params.resolution)).xy;
vec2 velocity_texels = velocity * params.resolution;
vec2 prev_velocity_texels = velocity_previous * params.resolution;
float disocclusion = length(prev_velocity_texels - velocity_texels) - params.disocclusion_threshold;
return clamp(disocclusion * DISOCCLUSION_SCALE, 0.0, 1.0);
float get_factor_disocclusion(vec2 uv, vec2 uv_reprojected) {
vec2 velocity_current = imageLoad(velocity_buffer, ivec2(uv * params.resolution)).xy * params.resolution;
vec2 velocity_previous = imageLoad(velocity_buffer, ivec2(uv_reprojected * params.resolution)).xy * params.resolution;
float disocclusion = length(velocity_current - velocity_previous);
return clamp(disocclusion, 0.0, 1.0);
}

vec3 temporal_antialiasing(uvec2 pos_group_top_left, uvec2 pos_group, uvec2 pos_screen, vec2 uv, sampler2D tex_history) {
Expand All @@ -324,45 +260,25 @@ vec3 temporal_antialiasing(uvec2 pos_group_top_left, uvec2 pos_group, uvec2 pos_
// Get history color (catmull-rom reduces a lot of the blurring that you get under motion)
vec3 color_history = sample_catmull_rom_9(tex_history, uv_reprojected, params.resolution).rgb;

// Clip history to the neighbourhood of the current sample (fixes a lot of the ghosting).
vec2 velocity_closest = vec2(0.0); // This is best done by using the velocity with the closest depth.
get_closest_pixel_velocity_3x3(pos_group, pos_group_top_left, velocity_closest);
color_history = clip_history_3x3(pos_group, color_history, velocity_closest);

// Compute blend factor
float blend_factor = RPC_16; // We want to be able to accumulate as many jitter samples as we generated, that is, 16.
{
// If re-projected UV is out of screen, converge to current color immediately.
float factor_screen = any(lessThan(uv_reprojected, vec2(0.0))) || any(greaterThan(uv_reprojected, vec2(1.0))) ? 1.0 : 0.0;

// Increase blend factor when there is disocclusion (fixes a lot of the remaining ghosting).
float factor_disocclusion = get_factor_disocclusion(uv_reprojected, velocity);
float factor_disocclusion = get_factor_disocclusion(uv, uv_reprojected);

// Add to the blend factor
blend_factor = clamp(blend_factor + factor_screen + factor_disocclusion, 0.0, 1.0);
}

// Resolve
vec3 color_resolved = vec3(0.0);
{
// Tonemap
color_history = reinhard(color_history);
color_input = reinhard(color_input);

// Reduce flickering
float lum_color = luminance(color_input);
float lum_history = luminance(color_history);
float diff = abs(lum_color - lum_history) / max(lum_color, max(lum_history, 1.001));
diff = 1.0 - diff;
diff = diff * diff;
blend_factor = mix(0.0, blend_factor, diff);

// Lerp/blend
color_resolved = mix(color_history, color_input, blend_factor);

// Inverse tonemap
color_resolved = reinhard_inverse(color_resolved);
}
vec3 color_resolved = clamp(mix(color_history, color_input, blend_factor), FLT_MIN, FLT_MAX);

color_resolved = background_detection(pos_group, color_resolved, color_history, color_input, blend_factor);

color_resolved = clip_history_3x3(pos_group, color_resolved);

return color_resolved;
}
Expand Down