godotengine · mrjustaguy · Nov 22, 2025
@@ -47,7 +47,7 @@ TAA::~TAA() {
 	taa_shader.version_free(shader_version);
 }
 
-void TAA::resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_prev_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far) {
+void TAA::resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far) {
 	UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton();
 	ERR_FAIL_NULL(uniform_set_cache);
 	MaterialStorage *material_storage = MaterialStorage::get_singleton();
@@ -76,11 +76,10 @@ void TAA::resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_pr
 	RD::Uniform u_frame_source(RD::UNIFORM_TYPE_IMAGE, 0, { p_frame });
 	RD::Uniform u_depth(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 1, { default_sampler, p_depth });
 	RD::Uniform u_velocity(RD::UNIFORM_TYPE_IMAGE, 2, { p_velocity });
-	RD::Uniform u_prev_velocity(RD::UNIFORM_TYPE_IMAGE, 3, { p_prev_velocity });
-	RD::Uniform u_history(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 4, { default_sampler, p_history });
-	RD::Uniform u_frame_dest(RD::UNIFORM_TYPE_IMAGE, 5, { p_temp });
+	RD::Uniform u_history(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 3, { default_sampler, p_history });
+	RD::Uniform u_frame_dest(RD::UNIFORM_TYPE_IMAGE, 4, { p_temp });
 
-	RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_frame_source, u_depth, u_velocity, u_prev_velocity, u_history, u_frame_dest), 0);
+	RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_frame_source, u_depth, u_velocity, u_history, u_frame_dest), 0);
 	RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(TAAResolvePushConstant));
 	RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_resolution.width, p_resolution.height, 1);
 	RD::get_singleton()->compute_list_end();
@@ -91,7 +90,6 @@ void TAA::process(Ref<RenderSceneBuffersRD> p_render_buffers, RD::DataFormat p_f
 
 	uint32_t view_count = p_render_buffers->get_view_count();
 	Size2i internal_size = p_render_buffers->get_internal_size();
-	Size2i target_size = p_render_buffers->get_target_size();
 
 	bool just_allocated = false;
 	if (!p_render_buffers->has_texture(SNAME("taa"), SNAME("history"))) {
@@ -100,8 +98,6 @@ void TAA::process(Ref<RenderSceneBuffersRD> p_render_buffers, RD::DataFormat p_f
 		p_render_buffers->create_texture(SNAME("taa"), SNAME("history"), p_format, usage_bits);
 		p_render_buffers->create_texture(SNAME("taa"), SNAME("temp"), p_format, usage_bits);
 
-		p_render_buffers->create_texture(SNAME("taa"), SNAME("prev_velocity"), RD::DATA_FORMAT_R16G16_SFLOAT, usage_bits);
-
 		just_allocated = true;
 	}
 
@@ -112,17 +108,15 @@ void TAA::process(Ref<RenderSceneBuffersRD> p_render_buffers, RD::DataFormat p_f
 		RID internal_texture = p_render_buffers->get_internal_texture(v);
 		RID velocity_buffer = p_render_buffers->get_velocity_buffer(false, v);
 		RID taa_history = p_render_buffers->get_texture_slice(SNAME("taa"), SNAME("history"), v, 0);
-		RID taa_prev_velocity = p_render_buffers->get_texture_slice(SNAME("taa"), SNAME("prev_velocity"), v, 0);
 
 		if (!just_allocated) {
 			RID depth_texture = p_render_buffers->get_depth_texture(v);
 			RID taa_temp = p_render_buffers->get_texture_slice(SNAME("taa"), SNAME("temp"), v, 0);
-			resolve(internal_texture, taa_temp, depth_texture, velocity_buffer, taa_prev_velocity, taa_history, Size2(internal_size.x, internal_size.y), p_z_near, p_z_far);
+			resolve(internal_texture, taa_temp, depth_texture, velocity_buffer, taa_history, Size2(internal_size.x, internal_size.y), p_z_near, p_z_far);
 			copy_effects->copy_to_rect(taa_temp, internal_texture, Rect2(0, 0, internal_size.x, internal_size.y));
 		}
 
 		copy_effects->copy_to_rect(internal_texture, taa_history, Rect2(0, 0, internal_size.x, internal_size.y));
-		copy_effects->copy_to_rect(velocity_buffer, taa_prev_velocity, Rect2(0, 0, target_size.x, target_size.y));
 	}
 
 	RD::get_singleton()->draw_command_end_label();

@@ -54,7 +54,7 @@ class TAA {
 	RID shader_version;
 	RID pipeline;
 
-	void resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_prev_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far);
+	void resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far);
 };
 
 } // namespace RendererRD
@@ -19,6 +19,7 @@
 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////////////////////////////////////////////
 // File changes (yyyy-mm-dd)
+// 2025-12-02: Rene Prašnikar: Changed history clipping, changed disocclusion logic, removed anti-flicker algorithm, removed tonemapping step, added basic background handling
 // 2025-11-05: Jakub Brzyski: Added dynamic variance, base variance value adjusted to reduce ghosting
 // 2022-05-06: Panos Karabelas: first commit
 // 2020-12-05: Joan Fons: convert to Vulkan and Godot
@@ -46,9 +47,8 @@ layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = 1) i
 layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D color_buffer;
 layout(set = 0, binding = 1) uniform sampler2D depth_buffer;
 layout(rg16f, set = 0, binding = 2) uniform restrict readonly image2D velocity_buffer;
-layout(rg16f, set = 0, binding = 3) uniform restrict readonly image2D last_velocity_buffer;
-layout(set = 0, binding = 4) uniform sampler2D history_buffer;
-layout(rgba16f, set = 0, binding = 5) uniform restrict writeonly image2D output_buffer;
+layout(set = 0, binding = 3) uniform sampler2D history_buffer;
+layout(rgba16f, set = 0, binding = 4) uniform restrict writeonly image2D output_buffer;
 
 layout(push_constant, std430) uniform Params {
 	vec2 resolution;
@@ -78,13 +78,6 @@ const int kGroupSize = GROUP_SIZE;
 const int kTileDimension = kGroupSize + kBorderSize * 2;
 const int kTileDimension2 = kTileDimension * kTileDimension;
 
-vec3 reinhard(vec3 hdr) {
-	return hdr / (hdr + 1.0);
-}
-vec3 reinhard_inverse(vec3 sdr) {
-	return sdr / (1.0 - sdr);
-}
-
 float get_depth(ivec2 thread_id) {
 	return texelFetch(depth_buffer, thread_id, 0).r;
 }
@@ -138,38 +131,6 @@ void populate_group_shared_memory(uvec2 group_id, uint group_index) {
 	barrier();
 }
 
-/*------------------------------------------------------------------------------
-								VELOCITY
-------------------------------------------------------------------------------*/
-
-void depth_test_min(uvec2 pos, inout float min_depth, inout uvec2 min_pos) {
-	float depth = load_depth(pos);
-
-	if (depth < min_depth) {
-		min_depth = depth;
-		min_pos = pos;
-	}
-}
-
-// Returns velocity with closest depth (3x3 neighborhood)
-void get_closest_pixel_velocity_3x3(in uvec2 group_pos, uvec2 group_top_left, out vec2 velocity) {
-	float min_depth = 1.0;
-	uvec2 min_pos = group_pos;
-
-	depth_test_min(group_pos + kOffsets3x3[0], min_depth, min_pos);
-	depth_test_min(group_pos + kOffsets3x3[1], min_depth, min_pos);
-	depth_test_min(group_pos + kOffsets3x3[2], min_depth, min_pos);
-	depth_test_min(group_pos + kOffsets3x3[3], min_depth, min_pos);
-	depth_test_min(group_pos + kOffsets3x3[4], min_depth, min_pos);
-	depth_test_min(group_pos + kOffsets3x3[5], min_depth, min_pos);
-	depth_test_min(group_pos + kOffsets3x3[6], min_depth, min_pos);
-	depth_test_min(group_pos + kOffsets3x3[7], min_depth, min_pos);
-	depth_test_min(group_pos + kOffsets3x3[8], min_depth, min_pos);
-
-	// Velocity out
-	velocity = imageLoad(velocity_buffer, ivec2(group_top_left + min_pos)).xy;
-}
-
 /*------------------------------------------------------------------------------
 							  HISTORY SAMPLING
 ------------------------------------------------------------------------------*/
@@ -231,37 +192,8 @@ vec3 sample_catmull_rom_9(sampler2D stex, vec2 uv, vec2 resolution) {
 							  HISTORY CLIPPING
 ------------------------------------------------------------------------------*/
 
-// Based on "Temporal Reprojection Anti-Aliasing" - https://github.com/playdeadgames/temporal
-vec3 clip_aabb(vec3 aabb_min, vec3 aabb_max, vec3 p, vec3 q) {
-	vec3 r = q - p;
-	vec3 rmax = (aabb_max - p.xyz);
-	vec3 rmin = (aabb_min - p.xyz);
-
-	if (r.x > rmax.x + FLT_MIN) {
-		r *= (rmax.x / r.x);
-	}
-	if (r.y > rmax.y + FLT_MIN) {
-		r *= (rmax.y / r.y);
-	}
-	if (r.z > rmax.z + FLT_MIN) {
-		r *= (rmax.z / r.z);
-	}
-
-	if (r.x < rmin.x - FLT_MIN) {
-		r *= (rmin.x / r.x);
-	}
-	if (r.y < rmin.y - FLT_MIN) {
-		r *= (rmin.y / r.y);
-	}
-	if (r.z < rmin.z - FLT_MIN) {
-		r *= (rmin.z / r.z);
-	}
-
-	return p + r;
-}
-
 // Clip history to the neighbourhood of the current sample
-vec3 clip_history_3x3(uvec2 group_pos, vec3 color_history, vec2 velocity_closest) {
+vec3 clip_history_3x3(uvec2 group_pos, vec3 color_history) {
 	// Sample a 3x3 neighbourhood
 	vec3 s1 = load_color(group_pos + kOffsets3x3[0]);
 	vec3 s2 = load_color(group_pos + kOffsets3x3[1]);
@@ -273,42 +205,46 @@ vec3 clip_history_3x3(uvec2 group_pos, vec3 color_history, vec2 velocity_closest
 	vec3 s8 = load_color(group_pos + kOffsets3x3[7]);
 	vec3 s9 = load_color(group_pos + kOffsets3x3[8]);
 
-	// Compute min and max (with an adaptive box size, which greatly reduces ghosting)
-	vec3 color_avg = (s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9) * RPC_9;
-	vec3 color_avg2 = ((s1 * s1) + (s2 * s2) + (s3 * s3) + (s4 * s4) + (s5 * s5) + (s6 * s6) + (s7 * s7) + (s8 * s8) + (s9 * s9)) * RPC_9;
-	// Use variance clipping as described in https://developer.download.nvidia.com/gameworks/events/GDC2016/msalvi_temporal_supersampling.pdf
-	float box_size = mix(0.0f, params.variance_dynamic, smoothstep(0.02f, 0.0f, length(velocity_closest)));
-	vec3 dev = sqrt(abs(color_avg2 - (color_avg * color_avg))) * box_size;
-	vec3 color_min = color_avg - dev;
-	vec3 color_max = color_avg + dev;
+	vec3 color_min = min(s1, min(s2, min(s3, min(s4, min(s5, min(s6, min(s7, min(s8, s9))))))));
+	vec3 color_max = max(s1, max(s2, max(s3, max(s4, max(s5, max(s6, max(s7, max(s8, s9))))))));
 
-	// Variance clipping
-	vec3 color = clip_aabb(color_min, color_max, clamp(color_avg, color_min, color_max), color_history);
+	vec3 color = clamp(color_history, color_min, color_max);
 
 	// Clamp to prevent NaNs
 	color = clamp(color, FLT_MIN, FLT_MAX);
 
 	return color;
 }
 
+// Quickly converge when rendering only the background to avoid darkening high frequency background detail, like stars
+vec3 background_detection(uvec2 pos_group, vec3 resolve, vec3 color_history, vec3 color_input, float blend_factor) {
+	float d1 = load_depth(pos_group + kOffsets3x3[0]);
+	float d2 = load_depth(pos_group + kOffsets3x3[1]);
+	float d3 = load_depth(pos_group + kOffsets3x3[2]);
+	float d4 = load_depth(pos_group + kOffsets3x3[3]);
+	float d5 = load_depth(pos_group + kOffsets3x3[4]);
+	float d6 = load_depth(pos_group + kOffsets3x3[5]);
+	float d7 = load_depth(pos_group + kOffsets3x3[6]);
+	float d8 = load_depth(pos_group + kOffsets3x3[7]);
+	float d9 = load_depth(pos_group + kOffsets3x3[8]);
+	float depth = d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8 + d9;
+	if (depth == 0) {
+		resolve = clamp(mix(color_history, color_input, max(0.5, blend_factor)), FLT_MIN, FLT_MAX);
+	}
+	return resolve;
+}
+
 /*------------------------------------------------------------------------------
 									TAA
 ------------------------------------------------------------------------------*/
 
-const vec3 lumCoeff = vec3(0.299f, 0.587f, 0.114f);
-
-float luminance(vec3 color) {
-	return max(dot(color, lumCoeff), 0.0001f);
-}
-
 // This is "velocity disocclusion" as described by https://www.elopezr.com/temporal-aa-and-the-quest-for-the-holy-trail/.
 // We use texel space, so our scale and threshold differ.
-float get_factor_disocclusion(vec2 uv_reprojected, vec2 velocity) {
-	vec2 velocity_previous = imageLoad(last_velocity_buffer, ivec2(uv_reprojected * params.resolution)).xy;
-	vec2 velocity_texels = velocity * params.resolution;
-	vec2 prev_velocity_texels = velocity_previous * params.resolution;
-	float disocclusion = length(prev_velocity_texels - velocity_texels) - params.disocclusion_threshold;
-	return clamp(disocclusion * DISOCCLUSION_SCALE, 0.0, 1.0);
+float get_factor_disocclusion(vec2 uv, vec2 uv_reprojected) {
+	vec2 velocity_current = imageLoad(velocity_buffer, ivec2(uv * params.resolution)).xy * params.resolution;
+	vec2 velocity_previous = imageLoad(velocity_buffer, ivec2(uv_reprojected * params.resolution)).xy * params.resolution;
+	float disocclusion = length(velocity_current - velocity_previous);
+	return clamp(disocclusion, 0.0, 1.0);
 }
 
 vec3 temporal_antialiasing(uvec2 pos_group_top_left, uvec2 pos_group, uvec2 pos_screen, vec2 uv, sampler2D tex_history) {
@@ -324,45 +260,25 @@ vec3 temporal_antialiasing(uvec2 pos_group_top_left, uvec2 pos_group, uvec2 pos_
 	// Get history color (catmull-rom reduces a lot of the blurring that you get under motion)
 	vec3 color_history = sample_catmull_rom_9(tex_history, uv_reprojected, params.resolution).rgb;
 
-	// Clip history to the neighbourhood of the current sample (fixes a lot of the ghosting).
-	vec2 velocity_closest = vec2(0.0); // This is best done by using the velocity with the closest depth.
-	get_closest_pixel_velocity_3x3(pos_group, pos_group_top_left, velocity_closest);
-	color_history = clip_history_3x3(pos_group, color_history, velocity_closest);
-
 	// Compute blend factor
 	float blend_factor = RPC_16; // We want to be able to accumulate as many jitter samples as we generated, that is, 16.
 	{
 		// If re-projected UV is out of screen, converge to current color immediately.
 		float factor_screen = any(lessThan(uv_reprojected, vec2(0.0))) || any(greaterThan(uv_reprojected, vec2(1.0))) ? 1.0 : 0.0;
 
 		// Increase blend factor when there is disocclusion (fixes a lot of the remaining ghosting).
-		float factor_disocclusion = get_factor_disocclusion(uv_reprojected, velocity);
+		float factor_disocclusion = get_factor_disocclusion(uv, uv_reprojected);
 
 		// Add to the blend factor
 		blend_factor = clamp(blend_factor + factor_screen + factor_disocclusion, 0.0, 1.0);
 	}
 
 	// Resolve
-	vec3 color_resolved = vec3(0.0);
-	{
-		// Tonemap
-		color_history = reinhard(color_history);
-		color_input = reinhard(color_input);
-
-		// Reduce flickering
-		float lum_color = luminance(color_input);
-		float lum_history = luminance(color_history);
-		float diff = abs(lum_color - lum_history) / max(lum_color, max(lum_history, 1.001));
-		diff = 1.0 - diff;
-		diff = diff * diff;
-		blend_factor = mix(0.0, blend_factor, diff);
-
-		// Lerp/blend
-		color_resolved = mix(color_history, color_input, blend_factor);
-
-		// Inverse tonemap
-		color_resolved = reinhard_inverse(color_resolved);
-	}
+	vec3 color_resolved = clamp(mix(color_history, color_input, blend_factor), FLT_MIN, FLT_MAX);
+
+	color_resolved = background_detection(pos_group, color_resolved, color_history, color_input, blend_factor);
+
+	color_resolved = clip_history_3x3(pos_group, color_resolved);
 
 	return color_resolved;
 }