diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml
index 1c7525b3afa4..23e03043ec60 100644
--- a/doc/classes/ProjectSettings.xml
+++ b/doc/classes/ProjectSettings.xml
@@ -3362,6 +3362,9 @@
A larger number is more efficient up to a limit, after that it will only waste RAM (maximum efficiency is achieved when there is no more than 1 pool per frame). A small number could end up with one pool per descriptor, which negatively impacts performance.
[b]Note:[/b] Changing this property requires a restart to take effect.
+
+ Determines whether to automatically generate a reactive mask for FSR 2/3. Setting this to [code]true[/code] will reduce flickering when upscaling transparent objects, at a cost of one more rendering pass.
+
Determines how sharp the upscaled image will be when using the FSR upscaling mode. Sharpness halves with every whole number. Values go from 0.0 (sharpest) to 2.0. Values above 2.0 won't make a visible difference.
diff --git a/doc/classes/RenderSceneBuffersConfiguration.xml b/doc/classes/RenderSceneBuffersConfiguration.xml
index b5c37f3288f9..37ee668eef20 100644
--- a/doc/classes/RenderSceneBuffersConfiguration.xml
+++ b/doc/classes/RenderSceneBuffersConfiguration.xml
@@ -12,6 +12,9 @@
Level of the anisotropic filter.
+
+ If [code]true[/code], automatically generate a reactive mask for FSR 2/3.
+
FSR Sharpness applicable if FSR upscaling is used.
diff --git a/doc/classes/RenderSceneBuffersExtension.xml b/doc/classes/RenderSceneBuffersExtension.xml
index fa3efbcaa0ef..20e799e57c2b 100644
--- a/doc/classes/RenderSceneBuffersExtension.xml
+++ b/doc/classes/RenderSceneBuffersExtension.xml
@@ -23,6 +23,13 @@
Implement this in GDExtension to change the anisotropic filtering level.
+
+
+
+
+ Implement this in GDExtension to record a new FSR auto generate reactive value.
+
+
diff --git a/doc/classes/RenderSceneBuffersRD.xml b/doc/classes/RenderSceneBuffersRD.xml
index 6a9445a25edf..6aee5ad0ec83 100644
--- a/doc/classes/RenderSceneBuffersRD.xml
+++ b/doc/classes/RenderSceneBuffersRD.xml
@@ -90,6 +90,12 @@
If [param msaa] is [code]true[/code] and MSAA is enabled, this returns the MSAA variant of the buffer.
+
+
+
+ Returns [code]true[/code] if FSR automatically generates reactive mask during upscaling, [code]false[/code] otherwise.
+
+
diff --git a/doc/classes/RenderingServer.xml b/doc/classes/RenderingServer.xml
index e1f7a53302b8..1ab0f567bfc9 100644
--- a/doc/classes/RenderingServer.xml
+++ b/doc/classes/RenderingServer.xml
@@ -4098,6 +4098,14 @@
Sets the viewport's environment mode which allows enabling or disabling rendering of 3D environment over 2D canvas. When disabled, 2D will not be affected by the environment. When enabled, 2D will be affected by the environment if the environment background mode is [constant ENV_BG_CANVAS]. The default behavior is to inherit the setting from the viewport's parent. If the topmost parent is also set to [constant VIEWPORT_ENVIRONMENT_INHERIT], then the behavior will be the same as if it was set to [constant VIEWPORT_ENVIRONMENT_ENABLED].
+
+
+
+
+
+ Determines whether to automatically generate a reactive mask for FSR 2/3. Setting this to [code]true[/code] will reduce flickering when upscaling transparent objects, at a cost of one more rendering pass.
+
+
@@ -5100,21 +5108,24 @@
Use bilinear scaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in undersampling while values greater than [code]1.0[/code] will result in supersampling. A value of [code]1.0[/code] disables scaling.
-
- Use AMD FidelityFX Super Resolution 1.0 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using FSR. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] disables scaling.
+
+ Use AMD FidelityFX Super Resolution 1.2 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using FSR1. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] disables scaling.
- Use AMD FidelityFX Super Resolution 2.2 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using FSR2. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use FSR2 at native resolution as a TAA solution.
+ Use AMD FidelityFX Super Resolution 2.3 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using FSR2. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use FSR2 at native resolution as a TAA solution.
+
+
+ Use AMD FidelityFX Super Resolution 3.1 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using FSR3. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use FSR3 at native resolution as a TAA solution.
-
+
Use MetalFX spatial upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using MetalFX. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] disables scaling.
[b]Note:[/b] Only supported when the Metal rendering driver is in use, which limits this scaling mode to macOS and iOS.
-
+
Use MetalFX temporal upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using MetalFX. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use MetalFX at native resolution as a TAA solution.
[b]Note:[/b] Only supported when the Metal rendering driver is in use, which limits this scaling mode to macOS and iOS.
-
+
Represents the size of the [enum ViewportScaling3DMode] enum.
diff --git a/doc/classes/Viewport.xml b/doc/classes/Viewport.xml
index 7832cc844c86..f90777b4504b 100644
--- a/doc/classes/Viewport.xml
+++ b/doc/classes/Viewport.xml
@@ -343,6 +343,10 @@
Disable 3D rendering (but keep 2D rendering).
+
+ Determines whether FSR 2/3 runs an additional pass to generate an reactive mask. Enabling this will increase GPU time, but provide better upscaling results for transparent objects.
+ To control this property on the root viewport, set the [member ProjectSettings.rendering/scaling_3d/fsr_auto_generate_reactive] project setting.
+
Determines how sharp the upscaled image will be when using the FSR upscaling mode. Sharpness halves with every whole number. Values go from 0.0 (sharpest) to 2.0. Values above 2.0 won't make a visible difference.
To control this property on the root viewport, set the [member ProjectSettings.rendering/scaling_3d/fsr_sharpness] project setting.
@@ -559,21 +563,23 @@
Use AMD FidelityFX Super Resolution 2.2 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using FSR2. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use FSR2 at native resolution as a TAA solution.
-
+
+
+
Use the [url=https://developer.apple.com/documentation/metalfx/mtlfxspatialscaler#overview]MetalFX spatial upscaler[/url] for the viewport's 3D buffer.
The amount of scaling can be set using [member scaling_3d_scale].
Values less than [code]1.0[/code] will result in the viewport being upscaled using MetalFX. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] disables scaling.
More information: [url=https://developer.apple.com/documentation/metalfx]MetalFX[/url].
[b]Note:[/b] Only supported when the Metal rendering driver is in use, which limits this scaling mode to macOS and iOS.
-
+
Use the [url=https://developer.apple.com/documentation/metalfx/mtlfxtemporalscaler#overview]MetalFX temporal upscaler[/url] for the viewport's 3D buffer.
The amount of scaling can be set using [member scaling_3d_scale]. To determine the minimum input scale, use the [method RenderingDevice.limit_get] method with [constant RenderingDevice.LIMIT_METALFX_TEMPORAL_SCALER_MIN_SCALE].
Values less than [code]1.0[/code] will result in the viewport being upscaled using MetalFX. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use MetalFX at native resolution as a TAA solution.
More information: [url=https://developer.apple.com/documentation/metalfx]MetalFX[/url].
[b]Note:[/b] Only supported when the Metal rendering driver is in use, which limits this scaling mode to macOS and iOS.
-
+
Represents the size of the [enum Scaling3DMode] enum.
diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.h b/drivers/gles3/storage/render_scene_buffers_gles3.h
index f98454f3504c..b8bda7953cf4 100644
--- a/drivers/gles3/storage/render_scene_buffers_gles3.h
+++ b/drivers/gles3/storage/render_scene_buffers_gles3.h
@@ -104,6 +104,7 @@ class RenderSceneBuffersGLES3 : public RenderSceneBuffers {
virtual void set_anisotropic_filtering_level(RS::ViewportAnisotropicFiltering p_anisotropic_filtering_level) override {}
virtual void set_fsr_sharpness(float p_fsr_sharpness) override {}
+ virtual void set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) override {}
virtual void set_texture_mipmap_bias(float p_texture_mipmap_bias) override {}
virtual void set_use_debanding(bool p_use_debanding) override {}
void set_apply_color_adjustments_in_post(bool p_apply_in_post);
diff --git a/editor/scene/3d/node_3d_editor_plugin.cpp b/editor/scene/3d/node_3d_editor_plugin.cpp
index 98d6b9f9503e..3c049df0f004 100644
--- a/editor/scene/3d/node_3d_editor_plugin.cpp
+++ b/editor/scene/3d/node_3d_editor_plugin.cpp
@@ -3164,6 +3164,9 @@ void Node3DEditorViewport::_project_settings_changed() {
const float fsr_sharpness = GLOBAL_GET("rendering/scaling_3d/fsr_sharpness");
viewport->set_fsr_sharpness(fsr_sharpness);
+ const bool fsr_auto_generate_reactive = GLOBAL_GET("rendering/scaling_3d/fsr_auto_generate_reactive");
+ viewport->set_fsr_auto_generate_reactive(fsr_auto_generate_reactive);
+
const float texture_mipmap_bias = GLOBAL_GET("rendering/textures/default_filters/texture_mipmap_bias");
viewport->set_texture_mipmap_bias(texture_mipmap_bias);
diff --git a/scene/main/viewport.cpp b/scene/main/viewport.cpp
index c61932035ec9..831b6e43bccb 100644
--- a/scene/main/viewport.cpp
+++ b/scene/main/viewport.cpp
@@ -4890,6 +4890,21 @@ float Viewport::get_fsr_sharpness() const {
return fsr_sharpness;
}
+void Viewport::set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) {
+ ERR_MAIN_THREAD_GUARD;
+ if (fsr_auto_generate_reactive == p_fsr_auto_generate_reactive) {
+ return;
+ }
+
+ fsr_auto_generate_reactive = p_fsr_auto_generate_reactive;
+ RS::get_singleton()->viewport_set_fsr_auto_generate_reactive(viewport, p_fsr_auto_generate_reactive);
+}
+
+bool Viewport::get_fsr_auto_generate_reactive() const {
+ ERR_READ_THREAD_GUARD_V(0);
+ return fsr_auto_generate_reactive;
+}
+
void Viewport::set_texture_mipmap_bias(float p_texture_mipmap_bias) {
ERR_MAIN_THREAD_GUARD;
if (texture_mipmap_bias == p_texture_mipmap_bias) {
@@ -5123,6 +5138,9 @@ void Viewport::_bind_methods() {
ClassDB::bind_method(D_METHOD("set_fsr_sharpness", "fsr_sharpness"), &Viewport::set_fsr_sharpness);
ClassDB::bind_method(D_METHOD("get_fsr_sharpness"), &Viewport::get_fsr_sharpness);
+ ClassDB::bind_method(D_METHOD("set_fsr_auto_generate_reactive", "fsr_auto_generate_reactive"), &Viewport::set_fsr_auto_generate_reactive);
+ ClassDB::bind_method(D_METHOD("get_fsr_auto_generate_reactive"), &Viewport::get_fsr_auto_generate_reactive);
+
ClassDB::bind_method(D_METHOD("set_texture_mipmap_bias", "texture_mipmap_bias"), &Viewport::set_texture_mipmap_bias);
ClassDB::bind_method(D_METHOD("get_texture_mipmap_bias"), &Viewport::get_texture_mipmap_bias);
@@ -5163,11 +5181,12 @@ void Viewport::_bind_methods() {
#ifndef _3D_DISABLED
ADD_GROUP("Scaling 3D", "");
- ADD_PROPERTY(PropertyInfo(Variant::INT, "scaling_3d_mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.0 (Fast),FSR 2.2 (Slow),MetalFX (Spatial),MetalFX (Temporal)"), "set_scaling_3d_mode", "get_scaling_3d_mode");
+ ADD_PROPERTY(PropertyInfo(Variant::INT, "scaling_3d_mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.2 (Fast),FSR 2.3 (Slow),FSR 3.1 (Slow),MetalFX (Spatial),MetalFX (Temporal)"), "set_scaling_3d_mode", "get_scaling_3d_mode");
ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "scaling_3d_scale", PROPERTY_HINT_RANGE, "0.25,2.0,0.01"), "set_scaling_3d_scale", "get_scaling_3d_scale");
ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "texture_mipmap_bias", PROPERTY_HINT_RANGE, "-2,2,0.001"), "set_texture_mipmap_bias", "get_texture_mipmap_bias");
ADD_PROPERTY(PropertyInfo(Variant::INT, "anisotropic_filtering_level", PROPERTY_HINT_ENUM, String::utf8("Disabled (Fastest),2× (Faster),4× (Fast),8× (Average),16x (Slow)")), "set_anisotropic_filtering_level", "get_anisotropic_filtering_level");
ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "fsr_sharpness", PROPERTY_HINT_RANGE, "0,2,0.1"), "set_fsr_sharpness", "get_fsr_sharpness");
+ ADD_PROPERTY(PropertyInfo(Variant::BOOL, "fsr_auto_generate_reactive"), "set_fsr_auto_generate_reactive", "get_fsr_auto_generate_reactive");
ADD_GROUP("Variable Rate Shading", "vrs_");
ADD_PROPERTY(PropertyInfo(Variant::INT, "vrs_mode", PROPERTY_HINT_ENUM, "Disabled,Texture,XR"), "set_vrs_mode", "get_vrs_mode");
ADD_PROPERTY(PropertyInfo(Variant::INT, "vrs_update_mode", PROPERTY_HINT_ENUM, "Disabled,Once,Always"), "set_vrs_update_mode", "get_vrs_update_mode");
@@ -5224,6 +5243,7 @@ void Viewport::_bind_methods() {
BIND_ENUM_CONSTANT(SCALING_3D_MODE_BILINEAR);
BIND_ENUM_CONSTANT(SCALING_3D_MODE_FSR);
BIND_ENUM_CONSTANT(SCALING_3D_MODE_FSR2);
+ BIND_ENUM_CONSTANT(SCALING_3D_MODE_FSR3);
BIND_ENUM_CONSTANT(SCALING_3D_MODE_METALFX_SPATIAL);
BIND_ENUM_CONSTANT(SCALING_3D_MODE_METALFX_TEMPORAL);
BIND_ENUM_CONSTANT(SCALING_3D_MODE_MAX);
@@ -5369,6 +5389,7 @@ Viewport::Viewport() {
set_scaling_3d_mode((Viewport::Scaling3DMode)(int)GLOBAL_GET("rendering/scaling_3d/mode"));
set_scaling_3d_scale(GLOBAL_GET("rendering/scaling_3d/scale"));
set_fsr_sharpness((float)GLOBAL_GET("rendering/scaling_3d/fsr_sharpness"));
+ set_fsr_auto_generate_reactive((bool)GLOBAL_GET("rendering/scaling_3d/fsr_auto_generate_reactive"));
set_texture_mipmap_bias((float)GLOBAL_GET("rendering/textures/default_filters/texture_mipmap_bias"));
set_anisotropic_filtering_level((Viewport::AnisotropicFiltering)(int)GLOBAL_GET("rendering/textures/default_filters/anisotropic_filtering_level"));
#endif // _3D_DISABLED
diff --git a/scene/main/viewport.h b/scene/main/viewport.h
index cfd99df7fcfa..833b8c825cac 100644
--- a/scene/main/viewport.h
+++ b/scene/main/viewport.h
@@ -100,6 +100,7 @@ class Viewport : public Node {
SCALING_3D_MODE_BILINEAR,
SCALING_3D_MODE_FSR,
SCALING_3D_MODE_FSR2,
+ SCALING_3D_MODE_FSR3,
SCALING_3D_MODE_METALFX_SPATIAL,
SCALING_3D_MODE_METALFX_TEMPORAL,
SCALING_3D_MODE_MAX
@@ -314,6 +315,7 @@ class Viewport : public Node {
Scaling3DMode scaling_3d_mode = SCALING_3D_MODE_BILINEAR;
float scaling_3d_scale = 1.0;
float fsr_sharpness = 0.2f;
+ bool fsr_auto_generate_reactive = false;
float texture_mipmap_bias = 0.0f;
AnisotropicFiltering anisotropic_filtering_level = ANISOTROPY_4X;
bool use_debanding = false;
@@ -590,6 +592,9 @@ class Viewport : public Node {
void set_fsr_sharpness(float p_fsr_sharpness);
float get_fsr_sharpness() const;
+ void set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive);
+ bool get_fsr_auto_generate_reactive() const;
+
void set_texture_mipmap_bias(float p_texture_mipmap_bias);
float get_texture_mipmap_bias() const;
diff --git a/servers/rendering/renderer_rd/effects/SCsub b/servers/rendering/renderer_rd/effects/SCsub
index 30656a4225b7..329fcc0e32b8 100644
--- a/servers/rendering/renderer_rd/effects/SCsub
+++ b/servers/rendering/renderer_rd/effects/SCsub
@@ -11,9 +11,11 @@ env_effects = env.Clone()
thirdparty_obj = []
-thirdparty_dir = "#thirdparty/amd-fsr2/"
-thirdparty_sources = ["ffx_assert.cpp", "ffx_fsr2.cpp"]
-thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
+thirdparty_dir = "#thirdparty/amd-ffx/"
+thirdparty_sources = thirdparty_dir + "*.cpp"
+
+if env.dev_build:
+ env_effects.Append(CPPDEFINES=["FFX_DEBUG"])
env_effects.Prepend(CPPPATH=[thirdparty_dir])
@@ -69,6 +71,7 @@ env.servers_sources += thirdparty_obj
module_obj = []
env_effects.add_source_files(module_obj, "*.cpp")
+env_effects.add_source_files(module_obj, "ffx/*.cpp")
if env["metal"]:
env_effects.add_source_files(module_obj, "metal_fx.mm")
env.servers_sources += module_obj
diff --git a/servers/rendering/renderer_rd/effects/ffx/ffx_common.cpp b/servers/rendering/renderer_rd/effects/ffx/ffx_common.cpp
new file mode 100644
index 000000000000..97e8d55a91b8
--- /dev/null
+++ b/servers/rendering/renderer_rd/effects/ffx/ffx_common.cpp
@@ -0,0 +1,645 @@
+/**************************************************************************/
+/* ffx_common.cpp */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#include "ffx_common.h"
+
+#include "servers/rendering/renderer_rd/uniform_set_cache_rd.h"
+#include "servers/rendering/rendering_device_commons.h"
+#include "thirdparty/amd-ffx/ffx_fsr1.h"
+#include "thirdparty/amd-ffx/ffx_fsr2.h"
+#include "thirdparty/amd-ffx/ffx_fsr3upscaler.h"
+
+using namespace RendererRD;
+
+RD::TextureType FFXCommon::ffx_resource_type_to_rd_texture_type(FfxResourceType p_type) {
+ switch (p_type) {
+ case FFX_RESOURCE_TYPE_TEXTURE1D:
+ return RD::TEXTURE_TYPE_1D;
+ case FFX_RESOURCE_TYPE_TEXTURE2D:
+ return RD::TEXTURE_TYPE_2D;
+ case FFX_RESOURCE_TYPE_TEXTURE3D:
+ return RD::TEXTURE_TYPE_3D;
+ default:
+#ifdef DEV_ENABLED
+ ERR_PRINT("Unknown FFX resource type.");
+#endif
+ return RD::TEXTURE_TYPE_MAX;
+ }
+}
+
+FfxResourceType FFXCommon::rd_texture_type_to_ffx_resource_type(RD::TextureType p_type) {
+ switch (p_type) {
+ case RD::TEXTURE_TYPE_1D:
+ return FFX_RESOURCE_TYPE_TEXTURE1D;
+ case RD::TEXTURE_TYPE_2D:
+ return FFX_RESOURCE_TYPE_TEXTURE2D;
+ case RD::TEXTURE_TYPE_3D:
+ return FFX_RESOURCE_TYPE_TEXTURE3D;
+ default:
+#ifdef DEV_ENABLED
+ ERR_PRINT("Unknown FFX resource type.");
+#endif
+ return FFX_RESOURCE_TYPE_BUFFER;
+ }
+}
+
+RD::DataFormat FFXCommon::ffx_surface_format_to_rd_format(FfxSurfaceFormat p_format) {
+ switch (p_format) {
+ case FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS:
+ return RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
+ case FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT:
+ return RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
+ case FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT:
+ return RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
+ case FFX_SURFACE_FORMAT_R32G32_FLOAT:
+ return RD::DATA_FORMAT_R32G32_SFLOAT;
+ case FFX_SURFACE_FORMAT_R32_UINT:
+ return RD::DATA_FORMAT_R32_UINT;
+ case FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS:
+ return RD::DATA_FORMAT_R8G8B8A8_UNORM;
+ case FFX_SURFACE_FORMAT_R8G8B8A8_UNORM:
+ return RD::DATA_FORMAT_R8G8B8A8_UNORM;
+ case FFX_SURFACE_FORMAT_R11G11B10_FLOAT:
+ return RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32;
+ case FFX_SURFACE_FORMAT_R16G16_FLOAT:
+ return RD::DATA_FORMAT_R16G16_SFLOAT;
+ case FFX_SURFACE_FORMAT_R16G16_UINT:
+ return RD::DATA_FORMAT_R16G16_UINT;
+ case FFX_SURFACE_FORMAT_R16_FLOAT:
+ return RD::DATA_FORMAT_R16_SFLOAT;
+ case FFX_SURFACE_FORMAT_R16_UINT:
+ return RD::DATA_FORMAT_R16_UINT;
+ case FFX_SURFACE_FORMAT_R16_UNORM:
+ return RD::DATA_FORMAT_R16_UNORM;
+ case FFX_SURFACE_FORMAT_R16_SNORM:
+ return RD::DATA_FORMAT_R16_SNORM;
+ case FFX_SURFACE_FORMAT_R8_UNORM:
+ return RD::DATA_FORMAT_R8_UNORM;
+ case FFX_SURFACE_FORMAT_R8_UINT:
+ return RD::DATA_FORMAT_R8_UINT;
+ case FFX_SURFACE_FORMAT_R8G8_UNORM:
+ return RD::DATA_FORMAT_R8G8_UNORM;
+ case FFX_SURFACE_FORMAT_R32_FLOAT:
+ return RD::DATA_FORMAT_R32_SFLOAT;
+ default:
+#ifdef DEV_ENABLED
+ ERR_PRINT("Unknown FFX resource type.");
+#endif
+ return RD::DATA_FORMAT_MAX;
+ }
+}
+
+FfxSurfaceFormat FFXCommon::rd_format_to_ffx_surface_format(RD::DataFormat p_format) {
+ switch (p_format) {
+ case RD::DATA_FORMAT_R32G32B32A32_SFLOAT:
+ return FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT;
+ case RD::DATA_FORMAT_R16G16B16A16_SFLOAT:
+ return FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT;
+ case RD::DATA_FORMAT_R32G32_SFLOAT:
+ return FFX_SURFACE_FORMAT_R32G32_FLOAT;
+ case RD::DATA_FORMAT_R32_UINT:
+ return FFX_SURFACE_FORMAT_R32_UINT;
+ case RD::DATA_FORMAT_R8G8B8A8_UNORM:
+ return FFX_SURFACE_FORMAT_R8G8B8A8_UNORM;
+ case RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32:
+ return FFX_SURFACE_FORMAT_R11G11B10_FLOAT;
+ case RD::DATA_FORMAT_R16G16_SFLOAT:
+ return FFX_SURFACE_FORMAT_R16G16_FLOAT;
+ case RD::DATA_FORMAT_R16G16_UINT:
+ return FFX_SURFACE_FORMAT_R16G16_UINT;
+ case RD::DATA_FORMAT_R16_SFLOAT:
+ return FFX_SURFACE_FORMAT_R16_FLOAT;
+ case RD::DATA_FORMAT_R16_UINT:
+ return FFX_SURFACE_FORMAT_R16_UINT;
+ case RD::DATA_FORMAT_R16_UNORM:
+ return FFX_SURFACE_FORMAT_R16_UNORM;
+ case RD::DATA_FORMAT_R16_SNORM:
+ return FFX_SURFACE_FORMAT_R16_SNORM;
+ case RD::DATA_FORMAT_R8_UNORM:
+ return FFX_SURFACE_FORMAT_R8_UNORM;
+ case RD::DATA_FORMAT_R8_UINT:
+ return FFX_SURFACE_FORMAT_R8_UINT;
+ case RD::DATA_FORMAT_R8G8_UNORM:
+ return FFX_SURFACE_FORMAT_R8G8_UNORM;
+ case RD::DATA_FORMAT_R32_SFLOAT:
+ return FFX_SURFACE_FORMAT_R32_FLOAT;
+ default:
+ return FFX_SURFACE_FORMAT_UNKNOWN;
+ }
+}
+
+static uint32_t ffx_usage_to_rd_usage_flags(uint32_t p_flags) {
+ uint32_t ret = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
+
+ if (p_flags & FFX_RESOURCE_USAGE_RENDERTARGET) {
+ ret |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
+ }
+
+ if (p_flags & FFX_RESOURCE_USAGE_UAV) {
+ ret |= RD::TEXTURE_USAGE_STORAGE_BIT;
+ ret |= RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT;
+ ret |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
+ }
+
+ return ret;
+}
+
+static FfxVersionNumber get_sdk_version_rd(FfxInterface *p_backend_interface) {
+ return FFX_SDK_MAKE_VERSION(FFX_SDK_VERSION_MAJOR, FFX_SDK_VERSION_MINOR, FFX_SDK_VERSION_PATCH);
+}
+
+static FfxErrorCode create_backend_context_rd(FfxInterface *p_backend_interface, FfxEffect p_effect,
+ FfxEffectBindlessConfig *p_bindless_config, FfxUInt32 *p_effect_context_id) {
+ FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
+
+ if (p_bindless_config) {
+ WARN_PRINT_ONCE("Fidelity FX: Bindless resources are not supported in Godot.");
+ }
+
+ // Store pointer to the device common to all contexts.
+ scratch.device = p_backend_interface->device;
+ scratch.effect_type = p_effect;
+ scratch.staging_constant_buffer = (uint8_t *)memalloc(FFX_STAGING_CONSTANT_BUFFER_SIZE);
+ ERR_FAIL_NULL_V(scratch.staging_constant_buffer, FFX_ERROR_OUT_OF_MEMORY);
+
+ // Create a ring buffer of uniform buffers.
+ // FIXME: This could be optimized to be a single memory block if it was possible for RD to create views into a particular memory range of a UBO.
+ for (uint32_t i = 0; i < FFX_UBO_RING_BUFFER_SIZE; i++) {
+ scratch.ubo_ring_buffer[i] = RD::get_singleton()->uniform_buffer_create(FFX_BUFFER_SIZE);
+ ERR_FAIL_COND_V(scratch.ubo_ring_buffer[i].is_null(), FFX_ERROR_BACKEND_API_ERROR);
+ }
+
+ return FFX_OK;
+}
+
+static FfxErrorCode get_device_capabilities_rd(FfxInterface *p_backend_interface, FfxDeviceCapabilities *p_out_device_capabilities) {
+ *p_out_device_capabilities = FFXCommon::get_device_capabilities();
+
+ return FFX_OK;
+}
+
+static FfxErrorCode destroy_backend_context_rd(FfxInterface *p_backend_interface, FfxUInt32 effect_context_id) {
+ FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
+ if (scratch.staging_constant_buffer) {
+ memfree(scratch.staging_constant_buffer);
+ }
+
+ for (uint32_t i = 0; i < FFX_UBO_RING_BUFFER_SIZE; i++) {
+ RD::get_singleton()->free_rid(scratch.ubo_ring_buffer[i]);
+ }
+
+ return FFX_OK;
+}
+
+static FfxErrorCode create_resource_rd(FfxInterface *p_backend_interface, const FfxCreateResourceDescription *p_create_resource_description, FfxUInt32 effect_context_id, FfxResourceInternal *p_out_resource) {
+ // FSR2's base implementation won't issue a call to create a heap type that isn't just default on its own,
+ // so we can safely ignore it as RD does not expose this concept.
+ ERR_FAIL_COND_V(p_create_resource_description->heapType != FFX_HEAP_TYPE_DEFAULT, FFX_ERROR_INVALID_ARGUMENT);
+
+ RenderingDevice *rd = RD::get_singleton();
+ FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
+ FfxResourceDescription res_desc = p_create_resource_description->resourceDescription;
+
+ // FSR2's base implementation never requests buffer creation.
+ ERR_FAIL_COND_V(res_desc.type != FFX_RESOURCE_TYPE_TEXTURE1D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE2D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE3D, FFX_ERROR_INVALID_ARGUMENT);
+
+ if (res_desc.mipCount == 0) {
+ // Mipmap count must be derived from the resource's dimensions.
+ res_desc.mipCount = uint32_t(1 + std::floor(std::log2(MAX(MAX(res_desc.width, res_desc.height), res_desc.depth))));
+ }
+
+ Vector initial_data;
+ if (p_create_resource_description->initData.type != FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED) {
+ PackedByteArray byte_array;
+ byte_array.resize(p_create_resource_description->initData.size);
+ switch (p_create_resource_description->initData.type) {
+ case FFX_RESOURCE_INIT_DATA_TYPE_BUFFER:
+ memcpy(byte_array.ptrw(), p_create_resource_description->initData.buffer, p_create_resource_description->initData.size);
+ break;
+ case FFX_RESOURCE_INIT_DATA_TYPE_VALUE:
+ memcpy(byte_array.ptrw(), &p_create_resource_description->initData.value, p_create_resource_description->initData.size);
+ break;
+ default:
+ ERR_PRINT("Invalid initial data type. ");
+ break;
+ }
+ initial_data.push_back(byte_array);
+ }
+
+ RD::TextureFormat texture_format;
+ texture_format.texture_type = FFXCommon::ffx_resource_type_to_rd_texture_type(res_desc.type);
+ texture_format.format = FFXCommon::ffx_surface_format_to_rd_format(res_desc.format);
+ texture_format.usage_bits = ffx_usage_to_rd_usage_flags(p_create_resource_description->resourceDescription.usage);
+ texture_format.width = res_desc.width;
+ texture_format.height = res_desc.height;
+ texture_format.depth = res_desc.depth;
+ texture_format.mipmaps = res_desc.mipCount;
+ texture_format.is_discardable = true;
+
+ RID texture = rd->texture_create(texture_format, RD::TextureView(), initial_data);
+ ERR_FAIL_COND_V(texture.is_null(), FFX_ERROR_BACKEND_API_ERROR);
+
+ rd->set_resource_name(texture, String(p_create_resource_description->name));
+
+ // Add the resource to the storage and use the internal index to reference it.
+ p_out_resource->internalIndex = scratch.resources.add(texture, false, p_create_resource_description->id, res_desc);
+
+ return FFX_OK;
+}
+
+static FfxErrorCode register_resource_rd(FfxInterface *p_backend_interface, const FfxResource *p_in_resource, FfxUInt32 effect_context_id, FfxResourceInternal *p_out_resource) {
+ if (p_in_resource->resource == nullptr) {
+ // Null resource case.
+ p_out_resource->internalIndex = -1;
+ return FFX_OK;
+ }
+
+ FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
+ const RID &rid = *reinterpret_cast(p_in_resource->resource);
+ ERR_FAIL_COND_V(rid.is_null(), FFX_ERROR_INVALID_ARGUMENT);
+
+ // Add the resource to the storage and use the internal index to reference it.
+ p_out_resource->internalIndex = scratch.resources.add(rid, true, FFXCommon::RESOURCE_ID_DYNAMIC, p_in_resource->description);
+
+ return FFX_OK;
+}
+
+static FfxErrorCode unregister_resources_rd(FfxInterface *p_backend_interface, FfxCommandList p_command_list, FfxUInt32 effect_context_id) {
+ FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
+ LocalVector dynamic_list_copy = scratch.resources.dynamic_list;
+ for (uint32_t i : dynamic_list_copy) {
+ scratch.resources.remove(i);
+ }
+
+ return FFX_OK;
+}
+
+static FfxResourceDescription get_resource_description_rd(FfxInterface *p_backend_interface, FfxResourceInternal p_resource) {
+ if (p_resource.internalIndex != -1) {
+ FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
+ return scratch.resources.descriptions[p_resource.internalIndex];
+ } else {
+ return {};
+ }
+}
+
+static FfxErrorCode destroy_resource_rd(FfxInterface *p_backend_interface, FfxResourceInternal p_resource, FfxUInt32 effect_context_id) {
+ if (p_resource.internalIndex != -1) {
+ FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
+ if (scratch.resources.rids[p_resource.internalIndex].is_valid()) {
+ RD::get_singleton()->free_rid(scratch.resources.rids[p_resource.internalIndex]);
+ scratch.resources.remove(p_resource.internalIndex);
+ }
+ }
+
+ return FFX_OK;
+}
+
+static FfxErrorCode create_pipeline_rd(FfxInterface *p_backend_interface, FfxEffect p_effect, FfxPass p_pass, uint32_t p_permutation_options, const FfxPipelineDescription *p_pipeline_description, FfxUInt32 p_effect_context_id, FfxPipelineState *p_out_pipeline) {
+ FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
+ FFXCommon::Device &device = *reinterpret_cast(scratch.device);
+
+ FFXCommon::Pass &effect_pass = device.passes[p_pass];
+
+ if (effect_pass.pipeline.pipeline_rid.is_null()) {
+ // Create pipeline for the device if it hasn't been created yet.
+ effect_pass.root_signature.shader_rid = effect_pass.shader->version_get_shader(effect_pass.shader_version, effect_pass.shader_variant);
+ ERR_FAIL_COND_V(effect_pass.root_signature.shader_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR);
+
+ effect_pass.pipeline.pipeline_rid = RD::get_singleton()->compute_pipeline_create(effect_pass.root_signature.shader_rid);
+ ERR_FAIL_COND_V(effect_pass.pipeline.pipeline_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR);
+ }
+
+#ifdef DEV_ENABLED
+ memcpy(p_out_pipeline->name, p_pipeline_description->name, sizeof(p_out_pipeline->name));
+#endif
+
+ // While this is not their intended use, we use the pipeline and root signature pointers to store the
+ // RIDs to the pipeline and shader that RD needs for the compute pipeline.
+ p_out_pipeline->pipeline = reinterpret_cast(&effect_pass.pipeline);
+ p_out_pipeline->rootSignature = reinterpret_cast(&effect_pass.root_signature);
+
+ // FSR doesn't use any buffers
+ p_out_pipeline->srvBufferCount = 0;
+ p_out_pipeline->srvTextureCount = effect_pass.sampled_texture_bindings.size();
+ ERR_FAIL_COND_V(p_out_pipeline->srvTextureCount + p_out_pipeline->srvBufferCount > FFX_MAX_NUM_SRVS, FFX_ERROR_OUT_OF_RANGE);
+ memcpy(p_out_pipeline->srvTextureBindings, effect_pass.sampled_texture_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->srvTextureCount);
+
+ // FSR doesn't use any buffers
+ p_out_pipeline->uavBufferCount = 0;
+ p_out_pipeline->uavTextureCount = effect_pass.storage_texture_bindings.size();
+ ERR_FAIL_COND_V(p_out_pipeline->uavTextureCount + p_out_pipeline->uavBufferCount > FFX_MAX_NUM_UAVS, FFX_ERROR_OUT_OF_RANGE);
+ memcpy(p_out_pipeline->uavTextureBindings, effect_pass.storage_texture_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->uavTextureCount);
+
+ p_out_pipeline->constCount = effect_pass.uniform_bindings.size();
+ ERR_FAIL_COND_V(p_out_pipeline->constCount > FFX_MAX_NUM_CONST_BUFFERS, FFX_ERROR_OUT_OF_RANGE);
+ memcpy(p_out_pipeline->constantBufferBindings, effect_pass.uniform_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->constCount);
+
+ if (p_effect == FFX_EFFECT_FSR2) {
+ bool low_resolution_mvs = (p_pipeline_description->contextFlags & FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) == 0;
+
+ if (p_pass == FFX_FSR2_PASS_ACCUMULATE || p_pass == FFX_FSR2_PASS_ACCUMULATE_SHARPEN) {
+ // Change the binding for motion vectors in this particular pass if low resolution MVs are used.
+ if (low_resolution_mvs) {
+ FfxResourceBinding &binding = p_out_pipeline->srvTextureBindings[2];
+ wcscpy_s(binding.name, L"r_dilated_motion_vectors");
+ }
+ }
+ }
+
+ if (p_effect == FFX_EFFECT_FSR3UPSCALER) {
+ bool low_resolution_mvs = (p_pipeline_description->contextFlags & FFX_FSR3UPSCALER_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) == 0;
+
+ if (p_pass == FFX_FSR3UPSCALER_PASS_ACCUMULATE || p_pass == FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN) {
+ // Change the binding for motion vectors in this particular pass if low resolution MVs are used.
+ if (low_resolution_mvs) {
+ FfxResourceBinding &binding = p_out_pipeline->srvTextureBindings[2];
+ wcscpy_s(binding.name, L"r_dilated_motion_vectors");
+ }
+ }
+ }
+
+ return FFX_OK;
+}
+
+static FfxErrorCode destroy_pipeline_rd(FfxInterface *p_backend_interface, FfxPipelineState *p_pipeline, FfxUInt32 p_effect_context_id) {
+ // We don't want to destroy pipelines when the FSR2 API deems it necessary as it'll do so whenever the context is destroyed.
+
+ return FFX_OK;
+}
+
+static FfxErrorCode schedule_gpu_job_rd(FfxInterface *p_backend_interface, const FfxGpuJobDescription *p_job) {
+ ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT);
+ ERR_FAIL_NULL_V(p_job, FFX_ERROR_INVALID_ARGUMENT);
+
+ FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
+ scratch.gpu_jobs.push_back(*p_job);
+
+ return FFX_OK;
+}
+
+static FfxErrorCode execute_gpu_job_clear_float_rd(FFXCommon::Scratch &p_scratch, const FfxClearFloatJobDescription &p_job, FfxUInt32 p_effect_context_id) {
+ RID resource = p_scratch.resources.rids[p_job.target.internalIndex];
+ FfxResourceDescription &desc = p_scratch.resources.descriptions[p_job.target.internalIndex];
+
+ ERR_FAIL_COND_V_MSG(desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT, "Cannot clear a buffer resource.");
+
+ Color color(p_job.color[0], p_job.color[1], p_job.color[2], p_job.color[3]);
+ RD::get_singleton()->texture_clear(resource, color, 0, desc.mipCount, 0, 1);
+
+ return FFX_OK;
+}
+
+static FfxErrorCode execute_gpu_job_copy_rd(FFXCommon::Scratch &p_scratch, const FfxCopyJobDescription &p_job, FfxUInt32 p_effect_context_id) {
+ RID src = p_scratch.resources.rids[p_job.src.internalIndex];
+ RID dst = p_scratch.resources.rids[p_job.dst.internalIndex];
+ FfxResourceDescription &src_desc = p_scratch.resources.descriptions[p_job.src.internalIndex];
+ FfxResourceDescription &dst_desc = p_scratch.resources.descriptions[p_job.dst.internalIndex];
+
+ ERR_FAIL_COND_V(src_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT);
+ ERR_FAIL_COND_V(dst_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT);
+
+ for (uint32_t mip_level = 0; mip_level < src_desc.mipCount; mip_level++) {
+ RD::get_singleton()->texture_copy(src, dst, Vector3(0, 0, 0), Vector3(0, 0, 0), Vector3(src_desc.width, src_desc.height, src_desc.depth), mip_level, mip_level, 0, 0);
+ }
+
+ return FFX_OK;
+}
+
+static FfxErrorCode execute_gpu_job_compute_rd(FFXCommon::Scratch &p_scratch, const FfxComputeJobDescription &p_job, FfxUInt32 p_effect_context_id) {
+ UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton();
+ ERR_FAIL_NULL_V(uniform_set_cache, FFX_ERROR_BACKEND_API_ERROR);
+
+ FFXCommon::RootSignature &root_signature = *reinterpret_cast(p_job.pipeline.rootSignature);
+ ERR_FAIL_COND_V(root_signature.shader_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT);
+
+ FFXCommon::Pipeline &backend_pipeline = *reinterpret_cast(p_job.pipeline.pipeline);
+ ERR_FAIL_COND_V(backend_pipeline.pipeline_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT);
+
+ thread_local LocalVector compute_uniforms;
+ compute_uniforms.clear();
+
+ for (uint32_t i = 0; i < p_job.pipeline.srvTextureCount; i++) {
+ RID texture_rid = p_scratch.resources.rids[p_job.srvTextures[i].resource.internalIndex];
+ RD::Uniform texture_uniform(RD::UNIFORM_TYPE_TEXTURE, p_job.pipeline.srvTextureBindings[i].slotIndex, texture_rid);
+ compute_uniforms.push_back(texture_uniform);
+ }
+
+ ERR_FAIL_COND_V_MSG(p_job.pipeline.srvBufferCount > 0, FFX_ERROR_BACKEND_API_ERROR, "Since FSR doesn't use buffers, SRV buffers are not supported.");
+
+ for (uint32_t i = 0; i < p_job.pipeline.uavTextureCount; i++) {
+ RID image_rid = p_scratch.resources.rids[p_job.uavTextures[i].resource.internalIndex];
+ RD::Uniform storage_uniform;
+ storage_uniform.uniform_type = RD::UNIFORM_TYPE_IMAGE;
+ storage_uniform.binding = p_job.pipeline.uavTextureBindings[i].slotIndex;
+
+ int mipCount = p_scratch.resources.descriptions[p_job.uavTextures[i].resource.internalIndex].mipCount;
+ if (mipCount > 1) {
+ LocalVector &mip_slice_rids = p_scratch.resources.mip_slice_rids[p_job.uavTextures[i].resource.internalIndex];
+ if (mip_slice_rids.is_empty()) {
+ mip_slice_rids.resize(mipCount);
+ }
+
+ ERR_FAIL_COND_V(p_job.uavTextures[i].mip >= mip_slice_rids.size(), FFX_ERROR_INVALID_ARGUMENT);
+
+ if (mip_slice_rids[p_job.uavTextures[i].mip].is_null()) {
+ mip_slice_rids[p_job.uavTextures[i].mip] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), image_rid, 0, p_job.uavTextures[i].mip);
+ }
+
+ ERR_FAIL_COND_V(mip_slice_rids[p_job.uavTextures[i].mip].is_null(), FFX_ERROR_BACKEND_API_ERROR);
+
+ storage_uniform.append_id(mip_slice_rids[p_job.uavTextures[i].mip]);
+ } else {
+ storage_uniform.append_id(image_rid);
+ }
+
+ compute_uniforms.push_back(storage_uniform);
+ }
+
+ ERR_FAIL_COND_V_MSG(p_job.pipeline.uavBufferCount > 0, FFX_ERROR_BACKEND_API_ERROR, "Since FSR doesn't use buffers, UAV buffers are not supported.");
+
+ for (uint32_t i = 0; i < p_job.pipeline.constCount; i++) {
+ RID buffer_rid = p_scratch.ubo_ring_buffer[p_scratch.ubo_ring_buffer_index];
+ p_scratch.ubo_ring_buffer_index = (p_scratch.ubo_ring_buffer_index + 1) % FFX_UBO_RING_BUFFER_SIZE;
+
+ RD::get_singleton()->buffer_update(buffer_rid, 0, p_job.cbs[i].num32BitEntries * sizeof(uint32_t), p_job.cbs[i].data);
+
+ RD::Uniform buffer_uniform(RD::UNIFORM_TYPE_UNIFORM_BUFFER, p_job.pipeline.constantBufferBindings[i].slotIndex, buffer_rid);
+ compute_uniforms.push_back(buffer_uniform);
+ }
+
+ FFXCommon::Device &device = *reinterpret_cast(p_scratch.device);
+
+ if (p_scratch.effect_type == FFX_EFFECT_FSR1) {
+ RD::Uniform u_linear_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 100, device.linear_clamp_sampler);
+ compute_uniforms.push_back(u_linear_clamp_sampler);
+ } else if (p_scratch.effect_type == FFX_EFFECT_FSR2 || p_scratch.effect_type == FFX_EFFECT_FSR3UPSCALER) {
+ RD::Uniform u_point_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 100, device.point_clamp_sampler);
+ RD::Uniform u_linear_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 101, device.linear_clamp_sampler);
+ compute_uniforms.push_back(u_point_clamp_sampler);
+ compute_uniforms.push_back(u_linear_clamp_sampler);
+ }
+
+ RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
+ RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, backend_pipeline.pipeline_rid);
+ RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache_vec(root_signature.shader_rid, 0, compute_uniforms), 0);
+ RD::get_singleton()->compute_list_dispatch(compute_list, p_job.dimensions[0], p_job.dimensions[1], p_job.dimensions[2]);
+ RD::get_singleton()->compute_list_end();
+
+ return FFX_OK;
+}
+
+static FfxErrorCode execute_gpu_jobs_rd(FfxInterface *p_backend_interface, FfxCommandList p_command_list, FfxUInt32 p_effect_context_id) {
+ ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT);
+
+ FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
+ FfxErrorCode error_code = FFX_OK;
+ for (const FfxGpuJobDescription &job : scratch.gpu_jobs) {
+ switch (job.jobType) {
+ case FFX_GPU_JOB_CLEAR_FLOAT: {
+ error_code = execute_gpu_job_clear_float_rd(scratch, job.clearJobDescriptor, p_effect_context_id);
+ } break;
+ case FFX_GPU_JOB_COPY: {
+ error_code = execute_gpu_job_copy_rd(scratch, job.copyJobDescriptor, p_effect_context_id);
+ } break;
+ case FFX_GPU_JOB_COMPUTE: {
+ error_code = execute_gpu_job_compute_rd(scratch, job.computeJobDescriptor, p_effect_context_id);
+ } break;
+ case FFX_GPU_JOB_DISCARD: {
+ // Discard is a DX12-only concept, so nothing has to be done.
+ // The DX12 backend handles this automatically.
+ } break;
+ default: {
+ error_code = FFX_ERROR_INVALID_ARGUMENT;
+ } break;
+ }
+
+ if (error_code != FFX_OK) {
+ scratch.gpu_jobs.clear();
+#ifdef DEV_ENABLED
+ ERR_PRINT(vformat("FFX GPU job failed with code %d", error_code));
+#endif
+ return error_code;
+ }
+ }
+
+ scratch.gpu_jobs.clear();
+
+ return FFX_OK;
+}
+
+static FfxErrorCode stage_constant_buffer_data_rd(FfxInterface *p_backend_interface, void *p_data, FfxUInt32 p_size, FfxConstantBuffer *p_constant_buffer) {
+ ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_POINTER);
+ ERR_FAIL_NULL_V(p_data, FFX_ERROR_INVALID_POINTER);
+ ERR_FAIL_NULL_V(p_constant_buffer, FFX_ERROR_INVALID_POINTER);
+
+ FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
+ if (scratch.staging_constant_buffer_base + FFX_ALIGN_UP(p_size, 256) >= FFX_STAGING_CONSTANT_BUFFER_SIZE) {
+ scratch.staging_constant_buffer_base = 0;
+ }
+
+ void *dst = scratch.staging_constant_buffer + scratch.staging_constant_buffer_base;
+ memcpy(dst, p_data, p_size);
+
+ p_constant_buffer->data = (uint32_t *)dst;
+ p_constant_buffer->num32BitEntries = p_size / sizeof(uint32_t);
+ scratch.staging_constant_buffer_base += FFX_ALIGN_UP(p_size, 256);
+
+ return FFX_OK;
+}
+
+FfxResource FFXCommon::get_resource_rd(RID *p_rid, const wchar_t *p_name) {
+ FfxResource res = {};
+ if (p_rid->is_null()) {
+ return res;
+ }
+
+ wcscpy_s(res.name, p_name);
+
+ RD::TextureFormat texture_format = RD::get_singleton()->texture_get_format(*p_rid);
+ res.description.type = rd_texture_type_to_ffx_resource_type(texture_format.texture_type);
+ res.description.format = rd_format_to_ffx_surface_format(texture_format.format);
+ res.description.width = texture_format.width;
+ res.description.height = texture_format.height;
+ res.description.depth = texture_format.depth;
+ res.description.mipCount = texture_format.mipmaps;
+ res.description.flags = FFX_RESOURCE_FLAGS_NONE;
+ res.resource = reinterpret_cast(p_rid);
+
+ return res;
+}
+
+FfxDeviceCapabilities FFXCommon::get_device_capabilities() {
+ FfxDeviceCapabilities capabilities = {};
+ capabilities.maximumSupportedShaderModel = FFX_SHADER_MODEL_6_7;
+ capabilities.waveLaneCountMin = 32;
+ capabilities.waveLaneCountMax = 32;
+ capabilities.fp16Supported = RD::get_singleton()->has_feature(RD::Features::SUPPORTS_HALF_FLOAT);
+ capabilities.raytracingSupported = false;
+
+ return capabilities;
+}
+
+void FFXCommon::create_ffx_interface(FfxInterface *p_interface, Scratch *p_scratch, Device *p_device) {
+ p_interface->fpGetSDKVersion = get_sdk_version_rd;
+ p_interface->fpCreateBackendContext = create_backend_context_rd;
+ p_interface->fpGetDeviceCapabilities = get_device_capabilities_rd;
+ p_interface->fpDestroyBackendContext = destroy_backend_context_rd;
+ p_interface->fpCreateResource = create_resource_rd;
+ p_interface->fpRegisterResource = register_resource_rd;
+ p_interface->fpUnregisterResources = unregister_resources_rd;
+ p_interface->fpGetResourceDescription = get_resource_description_rd;
+ p_interface->fpDestroyResource = destroy_resource_rd;
+ p_interface->fpCreatePipeline = create_pipeline_rd;
+ p_interface->fpDestroyPipeline = destroy_pipeline_rd;
+ p_interface->fpScheduleGpuJob = schedule_gpu_job_rd;
+ p_interface->fpExecuteGpuJobs = execute_gpu_jobs_rd;
+ p_interface->fpStageConstantBufferDataFunc = stage_constant_buffer_data_rd;
+ p_interface->scratchBuffer = p_scratch;
+ p_interface->scratchBufferSize = sizeof(*p_scratch);
+
+ p_interface->device = p_device;
+}
+
+RID FFXCommon::create_clamp_sampler(RD::SamplerFilter filter) {
+ RD::SamplerState state;
+ state.mag_filter = filter;
+ state.min_filter = filter;
+ state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
+ state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
+ state.repeat_w = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
+ state.min_lod = -1000.0f;
+ state.max_lod = 1000.0f;
+ state.anisotropy_max = 1.0;
+
+ RID sampler = RD::get_singleton()->sampler_create(state);
+ ERR_FAIL_COND_V(sampler.is_null(), RID());
+ return sampler;
+}
diff --git a/servers/rendering/renderer_rd/effects/fsr2.h b/servers/rendering/renderer_rd/effects/ffx/ffx_common.h
similarity index 63%
rename from servers/rendering/renderer_rd/effects/fsr2.h
rename to servers/rendering/renderer_rd/effects/ffx/ffx_common.h
index 6554588df5cb..05aee97fd81b 100644
--- a/servers/rendering/renderer_rd/effects/fsr2.h
+++ b/servers/rendering/renderer_rd/effects/ffx/ffx_common.h
@@ -1,5 +1,5 @@
/**************************************************************************/
-/* fsr2.h */
+/* ffx_common.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
@@ -30,14 +30,9 @@
#pragma once
-#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_accumulate_pass.glsl.gen.h"
-#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_autogen_reactive_pass.glsl.gen.h"
-#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_compute_luminance_pyramid_pass.glsl.gen.h"
-#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_depth_clip_pass.glsl.gen.h"
-#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_lock_pass.glsl.gen.h"
-#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_rcas_pass.glsl.gen.h"
-#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_reconstruct_previous_depth_pass.glsl.gen.h"
-#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_tcr_autogen_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/pipeline_deferred_rd.h"
+#include "servers/rendering/renderer_rd/shader_rd.h"
+#include "servers/rendering/rendering_server.h"
// This flag doesn't actually control anything GCC specific in FSR2. It determines
// if symbols should be exported, which is not required for Godot.
@@ -45,15 +40,18 @@
#define FFX_GCC
#endif
-#include "thirdparty/amd-fsr2/ffx_fsr2.h"
+#include "thirdparty/amd-ffx/ffx_interface.h"
-#define FSR2_MAX_QUEUED_FRAMES (4)
-#define FSR2_MAX_UNIFORM_BUFFERS (4)
-#define FSR2_MAX_BUFFERED_DESCRIPTORS (FFX_FSR2_PASS_COUNT * FSR2_MAX_QUEUED_FRAMES)
-#define FSR2_UBO_RING_BUFFER_SIZE (FSR2_MAX_BUFFERED_DESCRIPTORS * FSR2_MAX_UNIFORM_BUFFERS)
+#define FFX_UBO_RING_BUFFER_SIZE (FFX_MAX_QUEUED_FRAMES * FFX_MAX_NUM_CONST_BUFFERS * FFX_MAX_PASS_COUNT)
+// FFX defines really large size, but we don't need that much.
+// So we redefine a size considering the maximum size of uniform buffers
+// that FSR effects supported by Godot actually uses.
+#define FFX_STAGING_CONSTANT_BUFFER_SIZE (FFX_MAX_QUEUED_FRAMES * 2048)
namespace RendererRD {
-class FSR2Context {
+
+// Helper class for Fidelity FX interop.
+class FFXCommon {
public:
enum ResourceID : uint32_t {
RESOURCE_ID_DYNAMIC = 0xFFFFFFFF
@@ -109,23 +107,6 @@ class FSR2Context {
}
};
- struct Scratch {
- Resources resources;
- LocalVector gpu_jobs;
- RID ubo_ring_buffer[FSR2_UBO_RING_BUFFER_SIZE];
- uint32_t ubo_ring_buffer_index = 0;
- FfxDevice device = nullptr;
- };
-
- Scratch scratch;
- FfxFsr2Context fsr_context;
- FfxFsr2ContextDescription fsr_desc;
-
- ~FSR2Context();
-};
-
-class FSR2Effect {
-public:
struct RootSignature {
// Proxy structure to store the shader required by RD that uses the terminology used by the FSR2 API.
RID shader_rid;
@@ -141,56 +122,40 @@ class FSR2Effect {
RootSignature root_signature;
uint32_t shader_variant = 0;
Pipeline pipeline;
- Vector sampled_bindings;
- Vector storage_bindings;
+ Vector sampled_texture_bindings;
+ Vector storage_texture_bindings;
Vector uniform_bindings;
};
struct Device {
- Pass passes[FFX_FSR2_PASS_COUNT];
- FfxDeviceCapabilities capabilities;
RID point_clamp_sampler;
RID linear_clamp_sampler;
- };
+ Pass passes[FFX_MAX_PASS_COUNT];
+ } device;
- struct Parameters {
- FSR2Context *context;
- Size2i internal_size;
- RID color;
- RID depth;
- RID velocity;
- RID reactive;
- RID exposure;
- RID output;
- float z_near = 0.0f;
- float z_far = 0.0f;
- float fovy = 0.0f;
- Vector2 jitter;
- float delta_time = 0.0f;
- float sharpness = 0.0f;
- bool reset_accumulation = false;
- Projection reprojection;
+ struct Scratch {
+ FfxEffect effect_type;
+ Resources resources;
+ LocalVector gpu_jobs;
+ // Uniform ring buffer
+ RID ubo_ring_buffer[FFX_UBO_RING_BUFFER_SIZE];
+ uint32_t ubo_ring_buffer_index = 0;
+ // Staging buffer for constant buffer data.
+ uint8_t *staging_constant_buffer;
+ size_t staging_constant_buffer_base = 0;
+ // Pointer to the device common to all contexts.
+ // Static functions cannot access class members, so we store it here.
+ FfxDevice device;
};
- FSR2Effect();
- ~FSR2Effect();
- FSR2Context *create_context(Size2i p_internal_size, Size2i p_target_size);
- void upscale(const Parameters &p_params);
-
-private:
- struct {
- Fsr2DepthClipPassShaderRD depth_clip;
- Fsr2ReconstructPreviousDepthPassShaderRD reconstruct_previous_depth;
- Fsr2LockPassShaderRD lock;
- Fsr2AccumulatePassShaderRD accumulate;
- Fsr2AccumulatePassShaderRD accumulate_sharpen;
- Fsr2RcasPassShaderRD rcas;
- Fsr2ComputeLuminancePyramidPassShaderRD compute_luminance_pyramid;
- Fsr2AutogenReactivePassShaderRD autogen_reactive;
- Fsr2TcrAutogenPassShaderRD tcr_autogen;
- } shaders;
-
- Device device;
-};
+ static FfxDeviceCapabilities get_device_capabilities();
+ static void create_ffx_interface(FfxInterface *p_interface, Scratch *p_scratch, Device *p_device);
+ static RID create_clamp_sampler(RD::SamplerFilter filter);
-} // namespace RendererRD
+ static FfxResource get_resource_rd(RID *p_rid, const wchar_t *p_name);
+ static RD::TextureType ffx_resource_type_to_rd_texture_type(FfxResourceType p_type);
+ static FfxResourceType rd_texture_type_to_ffx_resource_type(RD::TextureType p_type);
+ static RD::DataFormat ffx_surface_format_to_rd_format(FfxSurfaceFormat p_format);
+ static FfxSurfaceFormat rd_format_to_ffx_surface_format(RD::DataFormat p_format);
+};
+} //namespace RendererRD
diff --git a/servers/rendering/renderer_rd/effects/ffx/fsr1.cpp b/servers/rendering/renderer_rd/effects/ffx/fsr1.cpp
new file mode 100644
index 000000000000..eb34961db8d2
--- /dev/null
+++ b/servers/rendering/renderer_rd/effects/ffx/fsr1.cpp
@@ -0,0 +1,164 @@
+/**************************************************************************/
+/* fsr1.cpp */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#include "fsr1.h"
+
+#include "servers/rendering/renderer_rd/storage_rd/material_storage.h"
+#include "servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h"
+
+using namespace RendererRD;
+
+FSR1Context::~FSR1Context() {
+ ffxFsr1ContextDestroy(&fsr_context);
+}
+
+void FSR1Effect::ensure_context(Ref p_render_buffers) {
+ p_render_buffers->ensure_fsr1(this);
+}
+
+FSR1Effect::FSR1Effect() {
+ FfxDeviceCapabilities capabilities = FFXCommon::get_device_capabilities();
+
+ String general_defines =
+ "\n#define FFX_GPU\n"
+ "\n#define FFX_GLSL 1\n";
+
+ Vector modes_with_fp16;
+ modes_with_fp16.push_back("");
+ modes_with_fp16.push_back("\n#define FFX_HALF 1\n");
+
+ // Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and
+ // there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL
+ // files included in FSR1 and mapping the macro bindings (#define FSR1_BIND_*) to their respective implementation names.
+ //
+ // It is not guaranteed these will remain consistent at all between versions of FSR2, so it'll be necessary to keep these
+ // bindings up to date whenever the library is updated. In such cases, it is very likely the validation layer will throw an
+ // error if the bindings do not match.
+
+ {
+ Vector easu_modes_with_fp16;
+ easu_modes_with_fp16.push_back("\n");
+ easu_modes_with_fp16.push_back("\n#define FFX_FSR1_OPTION_APPLY_RCAS 1\n");
+ easu_modes_with_fp16.push_back("\n#define FFX_HALF 1\n");
+ easu_modes_with_fp16.push_back("\n#define FFX_HALF 1\n#define FFX_FSR1_OPTION_APPLY_RCAS 1\n");
+
+ FFXCommon::Pass &pass = device.passes[FFX_FSR1_PASS_EASU];
+ pass.shader = &shaders.easu;
+ pass.shader->initialize(easu_modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 2 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_input_color" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 1, 0, 0, L"rw_internal_upscaled_color" },
+ FfxResourceBinding{ 2, 0, 0, L"rw_upscaled_output" }
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 3, 0, 0, L"cbFSR1" }
+ };
+
+ // EASU RCAS pass is a clone of the EASU pass with the RCAS variant.
+ FFXCommon::Pass &easu_rcas_pass = device.passes[FFX_FSR1_PASS_EASU_RCAS];
+ easu_rcas_pass = pass;
+ easu_rcas_pass.shader_variant = pass.shader_variant + 1;
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR1_PASS_RCAS];
+ pass.shader = &shaders.rcas;
+ pass.shader->initialize(modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_internal_upscaled_color" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 1, 0, 0, L"rw_upscaled_output" },
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 2, 0, 0, L"cbFSR1" }
+ };
+ }
+
+ device.linear_clamp_sampler = FFXCommon::create_clamp_sampler(RD::SAMPLER_FILTER_LINEAR);
+}
+
+FSR1Effect::~FSR1Effect() {
+ RD::get_singleton()->free_rid(device.linear_clamp_sampler);
+
+ for (uint32_t i = 0; i < FFX_FSR1_PASS_COUNT; i++) {
+ device.passes[i].shader->version_free(device.passes[i].shader_version);
+ }
+}
+
+FSR1Context *FSR1Effect::create_context(Size2i p_internal_size, Size2i p_target_size, RD::DataFormat p_output_format) {
+ FSR1Context *context = memnew(RendererRD::FSR1Context);
+ context->fsr_desc.flags = FFX_FSR1_ENABLE_HIGH_DYNAMIC_RANGE | FFX_FSR1_ENABLE_RCAS;
+ context->fsr_desc.maxRenderSize.width = p_internal_size.x;
+ context->fsr_desc.maxRenderSize.height = p_internal_size.y;
+ context->fsr_desc.displaySize.width = p_target_size.x;
+ context->fsr_desc.displaySize.height = p_target_size.y;
+ context->fsr_desc.outputFormat = FFXCommon::rd_format_to_ffx_surface_format(p_output_format);
+
+ FFXCommon::create_ffx_interface(&context->fsr_desc.backendInterface, &context->scratch, &device);
+ FfxErrorCode result = ffxFsr1ContextCreate(&context->fsr_context, &context->fsr_desc);
+ if (result == FFX_OK) {
+ return context;
+ } else {
+ memdelete(context);
+ return nullptr;
+ }
+}
+
+void FSR1Effect::process(Ref p_render_buffers, RID p_source_rd_texture, RID p_destination_texture) {
+ FSR1Context *fsr1_context = p_render_buffers->get_fsr1_context();
+
+ Size2i internal_size = p_render_buffers->get_internal_size();
+ float fsr_upscale_sharpness = p_render_buffers->get_fsr_sharpness();
+
+ FfxFsr1DispatchDescription dispatch_desc = {};
+ dispatch_desc.commandList = nullptr;
+ dispatch_desc.color = FFXCommon::get_resource_rd(&p_source_rd_texture, L"color");
+ dispatch_desc.output = FFXCommon::get_resource_rd(&p_destination_texture, L"output");
+ dispatch_desc.renderSize.width = internal_size.width;
+ dispatch_desc.renderSize.height = internal_size.height;
+ dispatch_desc.enableSharpening = (fsr_upscale_sharpness > 1e-6f);
+ dispatch_desc.sharpness = fsr_upscale_sharpness;
+
+ FfxErrorCode result = ffxFsr1ContextDispatch(&fsr1_context->fsr_context, &dispatch_desc);
+ ERR_FAIL_COND(result != FFX_OK);
+}
diff --git a/servers/rendering/renderer_rd/effects/fsr.h b/servers/rendering/renderer_rd/effects/ffx/fsr1.h
similarity index 65%
rename from servers/rendering/renderer_rd/effects/fsr.h
rename to servers/rendering/renderer_rd/effects/ffx/fsr1.h
index 7f308be203d3..3e1fc32f00a9 100644
--- a/servers/rendering/renderer_rd/effects/fsr.h
+++ b/servers/rendering/renderer_rd/effects/ffx/fsr1.h
@@ -1,5 +1,5 @@
/**************************************************************************/
-/* fsr.h */
+/* fsr1.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
@@ -30,47 +30,42 @@
#pragma once
-#include "spatial_upscaler.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_easu_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_rcas_pass.glsl.gen.h"
-#include "../storage_rd/render_scene_buffers_rd.h"
-#include "servers/rendering/renderer_rd/pipeline_deferred_rd.h"
-#include "servers/rendering/renderer_rd/shaders/effects/fsr_upscale.glsl.gen.h"
+#include "ffx_common.h"
+#include "servers/rendering/renderer_rd/effects/spatial_upscaler.h"
+#include "servers/rendering/rendering_server.h"
-namespace RendererRD {
+#include "thirdparty/amd-ffx/ffx_fsr1.h"
-class FSR : public SpatialUpscaler {
+namespace RendererRD {
+class FSR1Context {
public:
- FSR();
- ~FSR();
+ FFXCommon::Scratch scratch;
+ FfxFsr1Context fsr_context;
+ FfxFsr1ContextDescription fsr_desc;
- virtual const Span get_label() const final { return "FSR 1.0 Upscale"; }
- virtual void ensure_context(Ref p_render_buffers) final {}
- virtual void process(Ref p_render_buffers, RID p_source_rd_texture, RID p_destination_texture) final;
+ ~FSR1Context();
+};
-private:
- enum FSRShaderVariant {
- FSR_SHADER_VARIANT_NORMAL,
- FSR_SHADER_VARIANT_FALLBACK,
- };
+class FSR1Effect : public SpatialUpscaler {
+public:
+ FSR1Effect();
+ ~FSR1Effect() override;
- enum FSRUpscalePass {
- FSR_UPSCALE_PASS_EASU = 0,
- FSR_UPSCALE_PASS_RCAS = 1
- };
+ const Span get_label() const final { return "FSR 1.2 Upscale"; }
+ void ensure_context(Ref p_render_buffers) final;
+ void process(Ref p_render_buffers, RID p_source_rd_texture, RID p_destination_texture) final;
- struct FSRUpscalePushConstant {
- float resolution_width;
- float resolution_height;
- float upscaled_width;
- float upscaled_height;
- float sharpness;
- int pass;
- int _unused0, _unused1;
- };
+ FSR1Context *create_context(Size2i p_internal_size, Size2i p_target_size, RD::DataFormat p_output_format);
- FsrUpscaleShaderRD fsr_shader;
- RID shader_version;
- PipelineDeferredRD pipeline;
-};
+private:
+ struct {
+ Fsr1EasuPassShaderRD easu;
+ Fsr1RcasPassShaderRD rcas;
+ } shaders;
-} // namespace RendererRD
+ FFXCommon::Device device;
+};
+} //namespace RendererRD
diff --git a/servers/rendering/renderer_rd/effects/ffx/fsr2.cpp b/servers/rendering/renderer_rd/effects/ffx/fsr2.cpp
new file mode 100644
index 000000000000..19390f4fea74
--- /dev/null
+++ b/servers/rendering/renderer_rd/effects/ffx/fsr2.cpp
@@ -0,0 +1,383 @@
+/**************************************************************************/
+/* fsr2.cpp */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#include "fsr2.h"
+
+#include "servers/rendering/renderer_rd/storage_rd/material_storage.h"
+#include "servers/rendering/renderer_rd/uniform_set_cache_rd.h"
+
+using namespace RendererRD;
+
+FSR2Context::~FSR2Context() {
+ ffxFsr2ContextDestroy(&fsr_context);
+}
+
+FSR2Effect::FSR2Effect() {
+ FfxDeviceCapabilities capabilities = FFXCommon::get_device_capabilities();
+
+ String general_defines =
+ "\n#define FFX_GPU\n"
+ "\n#define FFX_GLSL 1\n"
+ "\n#define FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS 1\n"
+ "\n#define FFX_FSR2_OPTION_HDR_COLOR_INPUT 1\n"
+ "\n#define FFX_FSR2_OPTION_INVERTED_DEPTH 1\n"
+ "\n#define FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP 1\n"
+ "\n#define FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS 1\n";
+
+ Vector modes_single;
+ modes_single.push_back("");
+
+ Vector modes_with_fp16;
+ modes_with_fp16.push_back("");
+ modes_with_fp16.push_back("\n#define FFX_HALF 1\n");
+
+ // Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and
+ // there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL
+ // files included in FSR2 and mapping the macro bindings (#define FSR2_BIND_*) to their respective implementation names.
+ //
+ // It is not guaranteed these will remain consistent at all between versions of FSR2, so it'll be necessary to keep these
+ // bindings up to date whenever the library is updated. In such cases, it is very likely the validation layer will throw an
+ // error if the bindings do not match.
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_DEPTH_CLIP];
+ pass.shader = &shaders.depth_clip;
+ pass.shader->initialize(modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_reconstructed_previous_nearest_depth" },
+ FfxResourceBinding{ 1, 0, 0, L"r_dilated_motion_vectors" },
+ FfxResourceBinding{ 2, 0, 0, L"r_dilatedDepth" },
+ FfxResourceBinding{ 3, 0, 0, L"r_reactive_mask" },
+ FfxResourceBinding{ 4, 0, 0, L"r_transparency_and_composition_mask" },
+ // Godot render graph forces one resource to serve only one usage so we have to remove this binding
+ // FfxResourceBinding{ 5, 0, 0, L"r_prepared_input_color" },
+ FfxResourceBinding{ 6, 0, 0, L"r_previous_dilated_motion_vectors" },
+ FfxResourceBinding{ 7, 0, 0, L"r_input_motion_vectors" },
+ FfxResourceBinding{ 8, 0, 0, L"r_input_color_jittered" },
+ FfxResourceBinding{ 9, 0, 0, L"r_input_depth" },
+ FfxResourceBinding{ 10, 0, 0, L"r_input_exposure" }
+ };
+
+ pass.storage_texture_bindings = {
+ // FSR2_BIND_UAV_DEPTH_CLIP (11) does not point to anything.
+ FfxResourceBinding{ 12, 0, 0, L"rw_dilated_reactive_masks" },
+ FfxResourceBinding{ 13, 0, 0, L"rw_prepared_input_color" }
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 14, 0, 0, L"cbFSR2" }
+ };
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH];
+ pass.shader = &shaders.reconstruct_previous_depth;
+ pass.shader->initialize(modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_input_motion_vectors" },
+ FfxResourceBinding{ 1, 0, 0, L"r_input_depth" },
+ FfxResourceBinding{ 2, 0, 0, L"r_input_color_jittered" },
+ FfxResourceBinding{ 3, 0, 0, L"r_input_exposure" },
+ FfxResourceBinding{ 4, 0, 0, L"r_luma_history" }
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 5, 0, 0, L"rw_reconstructed_previous_nearest_depth" },
+ FfxResourceBinding{ 6, 0, 0, L"rw_dilated_motion_vectors" },
+ FfxResourceBinding{ 7, 0, 0, L"rw_dilatedDepth" },
+ FfxResourceBinding{ 8, 0, 0, L"rw_prepared_input_color" },
+ FfxResourceBinding{ 9, 0, 0, L"rw_luma_history" },
+ // FSR2_BIND_UAV_LUMA_INSTABILITY (10) does not point to anything.
+ FfxResourceBinding{ 11, 0, 0, L"rw_lock_input_luma" }
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 12, 0, 0, L"cbFSR2" }
+ };
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_LOCK];
+ pass.shader = &shaders.lock;
+ pass.shader->initialize(modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_lock_input_luma" }
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 1, 0, 0, L"rw_new_locks" },
+ FfxResourceBinding{ 2, 0, 0, L"rw_reconstructed_previous_nearest_depth" }
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 3, 0, 0, L"cbFSR2" }
+ };
+ }
+
+ {
+ Vector accumulate_modes_with_fp16;
+ accumulate_modes_with_fp16.push_back("\n");
+ accumulate_modes_with_fp16.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");
+ accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n");
+ accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");
+
+ // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput.
+ const bool fp16_path_supported = RD::get_singleton()->get_device_vendor_name() != "NVIDIA";
+ FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_ACCUMULATE];
+ pass.shader = &shaders.accumulate;
+ pass.shader->initialize(accumulate_modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported && fp16_path_supported ? 2 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_input_exposure" },
+ FfxResourceBinding{ 1, 0, 0, L"r_dilated_reactive_masks" },
+ FfxResourceBinding{ 2, 0, 0, L"r_input_motion_vectors" },
+ FfxResourceBinding{ 3, 0, 0, L"r_internal_upscaled_color" },
+ FfxResourceBinding{ 4, 0, 0, L"r_lock_status" },
+ FfxResourceBinding{ 5, 0, 0, L"r_input_depth" },
+ FfxResourceBinding{ 6, 0, 0, L"r_prepared_input_color" },
+ // FSR2_BIND_SRV_LUMA_INSTABILITY(7) does not point to anything.
+ FfxResourceBinding{ 8, 0, 0, L"r_lanczos_lut" },
+ FfxResourceBinding{ 9, 0, 0, L"r_upsample_maximum_bias_lut" },
+ FfxResourceBinding{ 10, 0, 0, L"r_imgMips" },
+ FfxResourceBinding{ 11, 0, 0, L"r_auto_exposure" },
+ FfxResourceBinding{ 12, 0, 0, L"r_luma_history" }
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 13, 0, 0, L"rw_internal_upscaled_color" },
+ FfxResourceBinding{ 14, 0, 0, L"rw_lock_status" },
+ FfxResourceBinding{ 15, 0, 0, L"rw_upscaled_output" },
+ FfxResourceBinding{ 16, 0, 0, L"rw_new_locks" },
+ FfxResourceBinding{ 17, 0, 0, L"rw_luma_history" }
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 18, 0, 0, L"cbFSR2" }
+ };
+
+ // Sharpen pass is a clone of the accumulate pass with the sharpening variant.
+ FFXCommon::Pass &sharpen_pass = device.passes[FFX_FSR2_PASS_ACCUMULATE_SHARPEN];
+ sharpen_pass = pass;
+ sharpen_pass.shader_variant = pass.shader_variant + 1;
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_RCAS];
+ pass.shader = &shaders.rcas;
+ pass.shader->initialize(modes_single, general_defines);
+ pass.shader_version = pass.shader->version_create();
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_input_exposure" },
+ FfxResourceBinding{ 1, 0, 0, L"r_rcas_input" }
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 2, 0, 0, L"rw_upscaled_output" }
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 3, 0, 0, L"cbFSR2" },
+ FfxResourceBinding{ 4, 0, 0, L"cbRCAS" }
+ };
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID];
+ pass.shader = &shaders.compute_luminance_pyramid;
+ pass.shader->initialize(modes_single, general_defines);
+ pass.shader_version = pass.shader->version_create();
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_input_color_jittered" }
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 1, 0, 0, L"rw_spd_global_atomic" },
+ FfxResourceBinding{ 2, 0, 0, L"rw_img_mip_shading_change" },
+ FfxResourceBinding{ 3, 0, 0, L"rw_img_mip_5" },
+ FfxResourceBinding{ 4, 0, 0, L"rw_auto_exposure" }
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 5, 0, 0, L"cbFSR2" },
+ FfxResourceBinding{ 6, 0, 0, L"cbSPD" }
+ };
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_GENERATE_REACTIVE];
+ pass.shader = &shaders.autogen_reactive;
+ pass.shader->initialize(modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_input_opaque_only" },
+ FfxResourceBinding{ 1, 0, 0, L"r_input_color_jittered" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 2, 0, 0, L"rw_output_autoreactive" }
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 3, 0, 0, L"cbGenerateReactive" },
+ FfxResourceBinding{ 4, 0, 0, L"cbFSR2" }
+ };
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_TCR_AUTOGENERATE];
+ pass.shader = &shaders.tcr_autogen;
+ pass.shader->initialize(modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_input_opaque_only" },
+ FfxResourceBinding{ 1, 0, 0, L"r_input_color_jittered" },
+ FfxResourceBinding{ 2, 0, 0, L"r_input_motion_vectors" },
+ FfxResourceBinding{ 3, 0, 0, L"r_input_prev_color_pre_alpha" },
+ FfxResourceBinding{ 4, 0, 0, L"r_input_prev_color_post_alpha" },
+ FfxResourceBinding{ 5, 0, 0, L"r_reactive_mask" },
+ FfxResourceBinding{ 6, 0, 0, L"r_transparency_and_composition_mask" },
+ FfxResourceBinding{ 13, 0, 0, L"r_input_depth" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 7, 0, 0, L"rw_output_autoreactive" },
+ FfxResourceBinding{ 8, 0, 0, L"rw_output_autocomposition" },
+ FfxResourceBinding{ 9, 0, 0, L"rw_output_prev_color_pre_alpha" },
+ FfxResourceBinding{ 10, 0, 0, L"rw_output_prev_color_post_alpha" }
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 11, 0, 0, L"cbFSR2" },
+ FfxResourceBinding{ 12, 0, 0, L"cbGenerateReactive" }
+ };
+ }
+
+ device.linear_clamp_sampler = FFXCommon::create_clamp_sampler(RD::SAMPLER_FILTER_LINEAR);
+ device.point_clamp_sampler = FFXCommon::create_clamp_sampler(RD::SAMPLER_FILTER_NEAREST);
+}
+
+FSR2Effect::~FSR2Effect() {
+ RD::get_singleton()->free_rid(device.point_clamp_sampler);
+ RD::get_singleton()->free_rid(device.linear_clamp_sampler);
+
+ for (uint32_t i = 0; i < FFX_FSR2_PASS_COUNT; i++) {
+ device.passes[i].shader->version_free(device.passes[i].shader_version);
+ }
+}
+
+FSR2Context *FSR2Effect::create_context(Size2i p_internal_size, Size2i p_target_size, bool p_autogen_reactive) {
+ FSR2Context *context = memnew(RendererRD::FSR2Context);
+ context->fsr_desc.flags = FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE | FFX_FSR2_ENABLE_DEPTH_INVERTED;
+ context->fsr_desc.maxRenderSize.width = p_internal_size.x;
+ context->fsr_desc.maxRenderSize.height = p_internal_size.y;
+ context->fsr_desc.displaySize.width = p_target_size.x;
+ context->fsr_desc.displaySize.height = p_target_size.y;
+
+ FFXCommon::create_ffx_interface(&context->fsr_desc.backendInterface, &context->scratch, &device);
+ FfxErrorCode result = ffxFsr2ContextCreate(&context->fsr_context, &context->fsr_desc);
+ if (result == FFX_OK) {
+ return context;
+ } else {
+ memdelete(context);
+ return nullptr;
+ }
+}
+
+void FSR2Effect::upscale(const Parameters &p_params) {
+ FfxFsr2DispatchDescription dispatch_desc = {};
+ RID color = p_params.color;
+ RID depth = p_params.depth;
+ RID velocity = p_params.velocity;
+ RID reactive = p_params.reactive;
+ RID exposure = p_params.exposure;
+ RID output = p_params.output;
+ dispatch_desc.commandList = nullptr;
+ dispatch_desc.color = FFXCommon::get_resource_rd(&color, L"color");
+ dispatch_desc.depth = FFXCommon::get_resource_rd(&depth, L"depth");
+ dispatch_desc.motionVectors = FFXCommon::get_resource_rd(&velocity, L"velocity");
+
+ // Optional pass of auto-generating reactive masks from opaque-only color.
+ // This may reduce flickering in scenarios where there are massive transparent objects.
+ RID opaque_only = p_params.opaque_only;
+ bool autogen_masks = opaque_only.is_valid();
+
+ dispatch_desc.enableAutoReactive = autogen_masks;
+ if (autogen_masks) {
+ dispatch_desc.autoTcThreshold = .2f;
+ dispatch_desc.autoTcScale = 1.0f;
+ dispatch_desc.autoReactiveScale = 1.0f;
+ dispatch_desc.autoReactiveMax = 0.9f;
+ dispatch_desc.colorOpaqueOnly = FFXCommon::get_resource_rd(&opaque_only, L"opaque_only");
+ dispatch_desc.reactive = {};
+ } else {
+ dispatch_desc.reactive = FFXCommon::get_resource_rd(&reactive, L"reactive");
+ }
+
+ dispatch_desc.exposure = FFXCommon::get_resource_rd(&exposure, L"exposure");
+ dispatch_desc.transparencyAndComposition = {};
+ dispatch_desc.output = FFXCommon::get_resource_rd(&output, L"output");
+ dispatch_desc.jitterOffset.x = p_params.jitter.x;
+ dispatch_desc.jitterOffset.y = p_params.jitter.y;
+ dispatch_desc.motionVectorScale.x = float(p_params.internal_size.width);
+ dispatch_desc.motionVectorScale.y = float(p_params.internal_size.height);
+ dispatch_desc.reset = p_params.reset_accumulation;
+ dispatch_desc.renderSize.width = p_params.internal_size.width;
+ dispatch_desc.renderSize.height = p_params.internal_size.height;
+ dispatch_desc.enableSharpening = (p_params.sharpness > 1e-6f);
+ dispatch_desc.sharpness = p_params.sharpness;
+ dispatch_desc.frameTimeDelta = p_params.delta_time;
+ dispatch_desc.preExposure = 1.0f;
+ dispatch_desc.cameraNear = p_params.z_near;
+ dispatch_desc.cameraFar = p_params.z_far;
+ dispatch_desc.cameraFovAngleVertical = p_params.fovy;
+ dispatch_desc.viewSpaceToMetersFactor = 1.0f;
+
+ MaterialStorage::store_camera(p_params.reprojection, dispatch_desc.reprojectionMatrix);
+
+ FfxErrorCode result = ffxFsr2ContextDispatch(&p_params.context->fsr_context, &dispatch_desc);
+ ERR_FAIL_COND(result != FFX_OK);
+}
diff --git a/servers/rendering/renderer_rd/effects/ffx/fsr2.h b/servers/rendering/renderer_rd/effects/ffx/fsr2.h
new file mode 100644
index 000000000000..986403cf253b
--- /dev/null
+++ b/servers/rendering/renderer_rd/effects/ffx/fsr2.h
@@ -0,0 +1,97 @@
+/**************************************************************************/
+/* fsr2.h */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#pragma once
+
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_accumulate_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_autogen_reactive_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_compute_luminance_pyramid_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_depth_clip_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_lock_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_rcas_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_reconstruct_previous_depth_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_tcr_autogen_pass.glsl.gen.h"
+#include "servers/rendering/rendering_server.h"
+
+#include "ffx_common.h"
+#include "thirdparty/amd-ffx/ffx_fsr2.h"
+
+namespace RendererRD {
+class FSR2Context {
+public:
+ FFXCommon::Scratch scratch;
+ FfxFsr2Context fsr_context;
+ FfxFsr2ContextDescription fsr_desc;
+
+ ~FSR2Context();
+};
+
+class FSR2Effect {
+public:
+ struct Parameters {
+ FSR2Context *context;
+ Size2i internal_size;
+ RID color;
+ RID depth;
+ RID velocity;
+ RID reactive;
+ RID opaque_only;
+ RID exposure;
+ RID output;
+ float z_near = 0.0f;
+ float z_far = 0.0f;
+ float fovy = 0.0f;
+ Vector2 jitter;
+ float delta_time = 0.0f;
+ float sharpness = 0.0f;
+ bool reset_accumulation = false;
+ Projection reprojection;
+ };
+
+ FSR2Effect();
+ ~FSR2Effect();
+ FSR2Context *create_context(Size2i p_internal_size, Size2i p_target_size, bool p_autogen_reactive);
+ void upscale(const Parameters &p_params);
+
+private:
+ struct {
+ Fsr2DepthClipPassShaderRD depth_clip;
+ Fsr2ReconstructPreviousDepthPassShaderRD reconstruct_previous_depth;
+ Fsr2LockPassShaderRD lock;
+ Fsr2AccumulatePassShaderRD accumulate;
+ Fsr2RcasPassShaderRD rcas;
+ Fsr2ComputeLuminancePyramidPassShaderRD compute_luminance_pyramid;
+ Fsr2AutogenReactivePassShaderRD autogen_reactive;
+ Fsr2TcrAutogenPassShaderRD tcr_autogen;
+ } shaders;
+
+ FFXCommon::Device device;
+};
+} //namespace RendererRD
diff --git a/servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.cpp b/servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.cpp
new file mode 100644
index 000000000000..ca0470ee6dc4
--- /dev/null
+++ b/servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.cpp
@@ -0,0 +1,519 @@
+/**************************************************************************/
+/* fsr3_upscaler.cpp */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#include "fsr3_upscaler.h"
+
+#include "servers/rendering/renderer_rd/storage_rd/material_storage.h"
+#include "servers/rendering/renderer_rd/uniform_set_cache_rd.h"
+
+using namespace RendererRD;
+
+static void fsr3_recv_message(FfxMsgType type, const wchar_t *message) {
+#ifdef DEV_ENABLED
+ switch (type) {
+ case FFX_MESSAGE_TYPE_ERROR:
+ ERR_PRINT(message);
+ break;
+ case FFX_MESSAGE_TYPE_WARNING:
+ WARN_PRINT(message);
+ break;
+ }
+#endif
+}
+
+FSR3UpscalerContext::~FSR3UpscalerContext() {
+ if (generated_reactive_mask.is_valid()) {
+ RD::get_singleton()->free_rid(generated_reactive_mask);
+ }
+
+ fsr_desc.backendInterface.fpDestroyResource(&fsr_desc.backendInterface, reconstructed_prev_nearest_depth, -1);
+ fsr_desc.backendInterface.fpDestroyResource(&fsr_desc.backendInterface, dilated_depth, -1);
+ fsr_desc.backendInterface.fpDestroyResource(&fsr_desc.backendInterface, dilated_motion_vectors, -1);
+
+ ffxFsr3UpscalerContextDestroy(&fsr_context);
+}
+
+FSR3UpscalerEffect::FSR3UpscalerEffect() {
+ FfxDeviceCapabilities capabilities = FFXCommon::get_device_capabilities();
+
+ String general_defines =
+ "\n#define FFX_GPU\n"
+ "\n#define FFX_GLSL 1\n"
+ "\n#define FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS 1\n"
+ "\n#define FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT 1\n"
+ "\n#define FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH 1\n"
+ "\n#define FFX_FSR3UPSCALER_OPTION_GODOT_REACTIVE_MASK_CLAMP 1\n"
+ "\n#define FFX_FSR3UPSCALER_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS 1\n";
+
+ Vector modes_single;
+ modes_single.push_back("");
+
+ Vector modes_with_fp16;
+ modes_with_fp16.push_back("");
+ modes_with_fp16.push_back("\n#define FFX_HALF 1\n");
+
+ // Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and
+ // there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL
+ // files included in FSR2 and mapping the macro bindings (#define FSR2_BIND_*) to their respective implementation names.
+ //
+ // It is not guaranteed these will remain consistent at all between versions of FSR2, so it'll be necessary to keep these
+ // bindings up to date whenever the library is updated. In such cases, it is very likely the validation layer will throw an
+ // error if the bindings do not match.
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_PREPARE_INPUTS];
+ pass.shader = &shaders.prepare_inputs;
+ pass.shader->initialize(modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_input_motion_vectors" },
+ FfxResourceBinding{ 1, 0, 0, L"r_input_depth" },
+ FfxResourceBinding{ 2, 0, 0, L"r_input_color_jittered" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 3, 0, 0, L"rw_dilated_motion_vectors" },
+ FfxResourceBinding{ 4, 0, 0, L"rw_dilated_depth" },
+ FfxResourceBinding{ 5, 0, 0, L"rw_reconstructed_previous_nearest_depth" },
+ FfxResourceBinding{ 6, 0, 0, L"rw_farthest_depth" },
+ FfxResourceBinding{ 7, 0, 0, L"rw_current_luma" },
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 8, 0, 0, L"cbFSR3Upscaler" }
+ };
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_LUMA_PYRAMID];
+ pass.shader = &shaders.luma_pyramid;
+ pass.shader->initialize(modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_current_luma" },
+ FfxResourceBinding{ 1, 0, 0, L"r_farthest_depth" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 2, 0, 0, L"rw_spd_global_atomic" },
+ FfxResourceBinding{ 3, 0, 0, L"rw_frame_info" },
+ FfxResourceBinding{ 4, 0, 0, L"rw_spd_mip0" },
+ FfxResourceBinding{ 5, 0, 0, L"rw_spd_mip1" },
+ FfxResourceBinding{ 6, 0, 0, L"rw_spd_mip2" },
+ FfxResourceBinding{ 7, 0, 0, L"rw_spd_mip3" },
+ FfxResourceBinding{ 8, 0, 0, L"rw_spd_mip4" },
+ FfxResourceBinding{ 9, 0, 0, L"rw_spd_mip5" },
+ FfxResourceBinding{ 10, 0, 0, L"rw_farthest_depth_mip1" },
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 11, 0, 0, L"cbFSR3Upscaler" },
+ FfxResourceBinding{ 12, 0, 0, L"cbSPD" },
+ };
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_SHADING_CHANGE_PYRAMID];
+ pass.shader = &shaders.shading_change_pyramid;
+ pass.shader->initialize(modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_current_luma" },
+ FfxResourceBinding{ 1, 0, 0, L"r_previous_luma" },
+ FfxResourceBinding{ 2, 0, 0, L"r_dilated_motion_vectors" },
+ FfxResourceBinding{ 3, 0, 0, L"r_input_exposure" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 4, 0, 0, L"rw_spd_global_atomic" },
+ FfxResourceBinding{ 5, 0, 0, L"rw_spd_mip0" },
+ FfxResourceBinding{ 6, 0, 0, L"rw_spd_mip1" },
+ FfxResourceBinding{ 7, 0, 0, L"rw_spd_mip2" },
+ FfxResourceBinding{ 8, 0, 0, L"rw_spd_mip3" },
+ FfxResourceBinding{ 9, 0, 0, L"rw_spd_mip4" },
+ FfxResourceBinding{ 10, 0, 0, L"rw_spd_mip5" },
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 11, 0, 0, L"cbFSR3Upscaler" },
+ FfxResourceBinding{ 12, 0, 0, L"cbSPD" },
+ };
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_SHADING_CHANGE];
+ pass.shader = &shaders.shading_change;
+ pass.shader->initialize(modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_spd_mips" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 1, 0, 0, L"rw_shading_change" },
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 2, 0, 0, L"cbFSR3Upscaler" },
+ };
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_PREPARE_REACTIVITY];
+ pass.shader = &shaders.prepare_reactivity;
+ pass.shader->initialize(modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_reconstructed_previous_nearest_depth" },
+ FfxResourceBinding{ 1, 0, 0, L"r_dilated_motion_vectors" },
+ FfxResourceBinding{ 2, 0, 0, L"r_dilated_depth" },
+ FfxResourceBinding{ 3, 0, 0, L"r_reactive_mask" },
+ FfxResourceBinding{ 4, 0, 0, L"r_transparency_and_composition_mask" },
+ FfxResourceBinding{ 5, 0, 0, L"r_accumulation" },
+ FfxResourceBinding{ 6, 0, 0, L"r_shading_change" },
+ FfxResourceBinding{ 7, 0, 0, L"r_current_luma" },
+ FfxResourceBinding{ 8, 0, 0, L"r_input_exposure" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 9, 0, 0, L"rw_dilated_reactive_masks" },
+ FfxResourceBinding{ 10, 0, 0, L"rw_new_locks" },
+ FfxResourceBinding{ 11, 0, 0, L"rw_accumulation" },
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 12, 0, 0, L"cbFSR3Upscaler" },
+ };
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_LUMA_INSTABILITY];
+ pass.shader = &shaders.luma_instability;
+ pass.shader->initialize(modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_input_exposure" },
+ FfxResourceBinding{ 1, 0, 0, L"r_dilated_reactive_masks" },
+ FfxResourceBinding{ 2, 0, 0, L"r_dilated_motion_vectors" },
+ FfxResourceBinding{ 3, 0, 0, L"r_frame_info" },
+ FfxResourceBinding{ 4, 0, 0, L"r_luma_history" },
+ FfxResourceBinding{ 5, 0, 0, L"r_farthest_depth_mip1" },
+ FfxResourceBinding{ 6, 0, 0, L"r_current_luma" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 7, 0, 0, L"rw_luma_history" },
+ FfxResourceBinding{ 8, 0, 0, L"rw_luma_instability" },
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 9, 0, 0, L"cbFSR3Upscaler" },
+ };
+ }
+
+ {
+ Vector accumulate_modes_with_fp16;
+ accumulate_modes_with_fp16.push_back("\n");
+ accumulate_modes_with_fp16.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");
+ accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n");
+ accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");
+
+ // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput.
+ const bool fp16_path_supported = RD::get_singleton()->get_device_vendor_name() != "NVIDIA";
+ FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_ACCUMULATE];
+ pass.shader = &shaders.accumulate;
+ pass.shader->initialize(accumulate_modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported && fp16_path_supported ? 2 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_input_exposure" },
+ FfxResourceBinding{ 1, 0, 0, L"r_dilated_reactive_masks" },
+ FfxResourceBinding{ 2, 0, 0, L"r_input_motion_vectors" },
+ FfxResourceBinding{ 3, 0, 0, L"r_internal_upscaled_color" },
+ FfxResourceBinding{ 4, 0, 0, L"r_lanczos_lut" },
+ FfxResourceBinding{ 5, 0, 0, L"r_farthest_depth_mip1" },
+ FfxResourceBinding{ 6, 0, 0, L"r_current_luma" },
+ FfxResourceBinding{ 7, 0, 0, L"r_luma_instability" },
+ FfxResourceBinding{ 8, 0, 0, L"r_input_color_jittered" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 9, 0, 0, L"rw_internal_upscaled_color" },
+ FfxResourceBinding{ 10, 0, 0, L"rw_upscaled_output" },
+ FfxResourceBinding{ 11, 0, 0, L"rw_new_locks" },
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 12, 0, 0, L"cbFSR3Upscaler" },
+ };
+
+ // Sharpen pass is a clone of the accumulate pass with the sharpening variant.
+ FFXCommon::Pass &sharpen_pass = device.passes[FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN];
+ sharpen_pass = pass;
+ sharpen_pass.shader_variant = pass.shader_variant + 1;
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_RCAS];
+ pass.shader = &shaders.rcas;
+ pass.shader->initialize(modes_single, general_defines);
+ pass.shader_version = pass.shader->version_create();
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_input_exposure" },
+ FfxResourceBinding{ 1, 0, 0, L"r_rcas_input" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 2, 0, 0, L"rw_upscaled_output" },
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 3, 0, 0, L"cbFSR3Upscaler" },
+ FfxResourceBinding{ 4, 0, 0, L"cbRCAS" },
+ };
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_DEBUG_VIEW];
+ pass.shader = &shaders.debug_view;
+ pass.shader->initialize(modes_single, general_defines);
+ pass.shader_version = pass.shader->version_create();
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_dilated_reactive_masks" },
+ FfxResourceBinding{ 1, 0, 0, L"r_dilated_motion_vectors" },
+ FfxResourceBinding{ 2, 0, 0, L"r_dilated_depth" },
+ FfxResourceBinding{ 3, 0, 0, L"r_internal_upscaled_color" },
+ FfxResourceBinding{ 4, 0, 0, L"r_input_exposure" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 5, 0, 0, L"rw_upscaled_output" },
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 6, 0, 0, L"cbFSR3Upscaler" },
+ };
+ }
+
+ {
+ FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_GENERATE_REACTIVE];
+ pass.shader = &shaders.autogen_reactive;
+ pass.shader->initialize(modes_with_fp16, general_defines);
+ pass.shader_version = pass.shader->version_create();
+ pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
+
+ pass.sampled_texture_bindings = {
+ FfxResourceBinding{ 0, 0, 0, L"r_input_opaque_only" },
+ FfxResourceBinding{ 1, 0, 0, L"r_input_color_jittered" },
+ };
+
+ pass.storage_texture_bindings = {
+ FfxResourceBinding{ 2, 0, 0, L"rw_output_autoreactive" },
+ // Though this binding is present in the GLSL source, but the FSR3 CXX side doesn't register it at all.
+ // So we must comment it out to avoid runtime errors.
+ // FfxResourceBinding{ 3, 0, 0, L"rw_output_autocomposition" },
+ };
+
+ pass.uniform_bindings = {
+ FfxResourceBinding{ 4, 0, 0, L"cbFSR3Upscaler" },
+ FfxResourceBinding{ 5, 0, 0, L"cbGenerateReactive" },
+ };
+ }
+
+ device.linear_clamp_sampler = FFXCommon::create_clamp_sampler(RD::SAMPLER_FILTER_LINEAR);
+ device.point_clamp_sampler = FFXCommon::create_clamp_sampler(RD::SAMPLER_FILTER_NEAREST);
+}
+
+FSR3UpscalerEffect::~FSR3UpscalerEffect() {
+ RD::get_singleton()->free_rid(device.point_clamp_sampler);
+ RD::get_singleton()->free_rid(device.linear_clamp_sampler);
+
+ for (uint32_t i = 0; i < FFX_FSR3UPSCALER_PASS_COUNT; i++) {
+ if (i == FFX_FSR3UPSCALER_PASS_TCR_AUTOGENERATE) {
+ // These passes are not even created, so no need to be freed
+ continue;
+ }
+
+ device.passes[i].shader->version_free(device.passes[i].shader_version);
+ }
+}
+
+FSR3UpscalerContext *FSR3UpscalerEffect::create_context(Size2i p_internal_size, Size2i p_target_size, bool p_autogen_reactive) {
+ FSR3UpscalerContext *context = memnew(RendererRD::FSR3UpscalerContext);
+ context->fsr_desc.flags = FFX_FSR3UPSCALER_ENABLE_HIGH_DYNAMIC_RANGE | FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED;
+#ifdef DEV_ENABLED
+ context->fsr_desc.flags |= FFX_FSR3UPSCALER_ENABLE_DEBUG_CHECKING;
+#endif
+ context->fsr_desc.maxRenderSize.width = p_internal_size.x;
+ context->fsr_desc.maxRenderSize.height = p_internal_size.y;
+ context->fsr_desc.maxUpscaleSize.width = p_target_size.x;
+ context->fsr_desc.maxUpscaleSize.height = p_target_size.y;
+ context->fsr_desc.fpMessage = fsr3_recv_message;
+
+ FFXCommon::create_ffx_interface(&context->fsr_desc.backendInterface, &context->scratch, &device);
+ FfxErrorCode result = ffxFsr3UpscalerContextCreate(&context->fsr_context, &context->fsr_desc);
+ if (result == FFX_OK) {
+ FfxFsr3UpscalerSharedResourceDescriptions shared_resource_descriptions;
+ ffxFsr3UpscalerGetSharedResourceDescriptions(&context->fsr_context, &shared_resource_descriptions);
+
+ // Create shared resources
+ result = context->fsr_desc.backendInterface.fpCreateResource(&context->fsr_desc.backendInterface, &shared_resource_descriptions.reconstructedPrevNearestDepth, -1, &context->reconstructed_prev_nearest_depth);
+ if (result != FFX_OK) {
+ ERR_PRINT("Failed to create FSR3 Upscaler shared resource: reconstructed_prev_nearest_depth.");
+ memdelete(context);
+ return nullptr;
+ }
+
+ result = context->fsr_desc.backendInterface.fpCreateResource(&context->fsr_desc.backendInterface, &shared_resource_descriptions.dilatedDepth, -1, &context->dilated_depth);
+ if (result != FFX_OK) {
+ ERR_PRINT("Failed to create FSR3 Upscaler shared resource: reconstructed_prev_nearest_depth.");
+ memdelete(context);
+ return nullptr;
+ }
+
+ result = context->fsr_desc.backendInterface.fpCreateResource(&context->fsr_desc.backendInterface, &shared_resource_descriptions.dilatedMotionVectors, -1, &context->dilated_motion_vectors);
+ if (result != FFX_OK) {
+ ERR_PRINT("Failed to create FSR3 Upscaler shared resource: reconstructed_prev_nearest_depth.");
+ memdelete(context);
+ return nullptr;
+ }
+
+ if (p_autogen_reactive) {
+ RD::TextureFormat texture_format;
+ texture_format.texture_type = RD::TEXTURE_TYPE_2D;
+ texture_format.format = RD::DATA_FORMAT_R8_UNORM;
+ texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
+ texture_format.width = p_internal_size.width;
+ texture_format.height = p_internal_size.height;
+ texture_format.depth = 1;
+ texture_format.mipmaps = 1;
+
+ context->generated_reactive_mask = RD::get_singleton()->texture_create(texture_format, RD::TextureView());
+ ERR_FAIL_COND_V_MSG(context->generated_reactive_mask.is_null(), nullptr, "Failed to create FSR3 Upscaler generated reactive mask texture.");
+ RD::get_singleton()->set_resource_name(context->generated_reactive_mask, L"FSR3UPSCALER_GeneratedReactiveMask");
+ }
+
+ return context;
+ } else {
+ memdelete(context);
+ return nullptr;
+ }
+}
+
+void FSR3UpscalerEffect::upscale(const Parameters &p_params) {
+ RID color = p_params.color;
+ RID depth = p_params.depth;
+ RID velocity = p_params.velocity;
+ RID reactive = p_params.reactive;
+ RID exposure = p_params.exposure;
+ RID output = p_params.output;
+
+ FFXCommon::Scratch &scratch = p_params.context->scratch;
+
+ RID opaque_only = p_params.opaque_only;
+ RID out_reactive = p_params.context->generated_reactive_mask;
+ bool autogen_masks = opaque_only.is_valid() && p_params.context->generated_reactive_mask.is_valid();
+
+ // Optional pass of auto-generating reactive masks from opaque-only color.
+ // This may reduce flickering in scenarios where there are massive transparent objects.
+ if (autogen_masks) {
+ FfxFsr3UpscalerGenerateReactiveDescription generate_desc = {};
+
+ generate_desc.commandList = nullptr;
+ generate_desc.colorPreUpscale = FFXCommon::get_resource_rd(&color, L"color");
+ generate_desc.colorOpaqueOnly = FFXCommon::get_resource_rd(&opaque_only, L"opaque_only");
+ generate_desc.outReactive = FFXCommon::get_resource_rd(&out_reactive, L"generated_reactive_mask");
+ generate_desc.binaryValue = 0.9f;
+ generate_desc.renderSize.width = p_params.internal_size.width;
+ generate_desc.renderSize.height = p_params.internal_size.height;
+ generate_desc.cutoffThreshold = 0.2f;
+ generate_desc.scale = 1.f;
+
+ FfxErrorCode err = ffxFsr3UpscalerContextGenerateReactiveMask(&p_params.context->fsr_context, &generate_desc);
+ if (err != FFX_OK) {
+ WARN_PRINT_ONCE("FSR3: Generate reactive mask enabled, but corresponding pass failed.");
+ autogen_masks = false;
+ }
+ }
+
+ FfxFsr3UpscalerDispatchDescription dispatch_desc = {};
+ RID reconstructed_prev_nearest_depth = scratch.resources.rids[p_params.context->reconstructed_prev_nearest_depth.internalIndex];
+ RID dilated_depth = scratch.resources.rids[p_params.context->dilated_depth.internalIndex];
+ RID dilated_motion_vectors = scratch.resources.rids[p_params.context->dilated_motion_vectors.internalIndex];
+
+ dispatch_desc.commandList = nullptr;
+ dispatch_desc.color = FFXCommon::get_resource_rd(&color, L"color");
+ dispatch_desc.depth = FFXCommon::get_resource_rd(&depth, L"depth");
+ dispatch_desc.reconstructedPrevNearestDepth = FFXCommon::get_resource_rd(&reconstructed_prev_nearest_depth, L"reconstructed_prev_nearest_depth");
+ dispatch_desc.dilatedDepth = FFXCommon::get_resource_rd(&dilated_depth, L"dilated_depth");
+ dispatch_desc.dilatedMotionVectors = FFXCommon::get_resource_rd(&dilated_motion_vectors, L"dilated_motion_vectors");
+ dispatch_desc.motionVectors = FFXCommon::get_resource_rd(&velocity, L"velocity");
+ dispatch_desc.reactive = FFXCommon::get_resource_rd(autogen_masks ? &out_reactive : &reactive, L"reactive");
+ dispatch_desc.exposure = FFXCommon::get_resource_rd(&exposure, L"exposure");
+ dispatch_desc.transparencyAndComposition = {};
+ dispatch_desc.output = FFXCommon::get_resource_rd(&output, L"output");
+ dispatch_desc.jitterOffset.x = p_params.jitter.x;
+ dispatch_desc.jitterOffset.y = p_params.jitter.y;
+ dispatch_desc.motionVectorScale.x = float(p_params.internal_size.width);
+ dispatch_desc.motionVectorScale.y = float(p_params.internal_size.height);
+ dispatch_desc.reset = p_params.reset_accumulation;
+ dispatch_desc.renderSize.width = p_params.internal_size.width;
+ dispatch_desc.renderSize.height = p_params.internal_size.height;
+ dispatch_desc.upscaleSize.width = p_params.target_size.width;
+ dispatch_desc.upscaleSize.height = p_params.target_size.height;
+ dispatch_desc.enableSharpening = (p_params.sharpness > 1e-6f);
+ dispatch_desc.sharpness = p_params.sharpness;
+ dispatch_desc.frameTimeDelta = p_params.delta_time;
+ dispatch_desc.preExposure = 1.0f;
+ dispatch_desc.cameraNear = p_params.z_near;
+ dispatch_desc.cameraFar = p_params.z_far;
+ dispatch_desc.cameraFovAngleVertical = p_params.fovy;
+ dispatch_desc.viewSpaceToMetersFactor = 1.0f;
+
+ MaterialStorage::store_camera(p_params.reprojection, dispatch_desc.reprojectionMatrix);
+
+ FfxErrorCode result = ffxFsr3UpscalerContextDispatch(&p_params.context->fsr_context, &dispatch_desc);
+ ERR_FAIL_COND(result != FFX_OK);
+}
diff --git a/servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.h b/servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.h
new file mode 100644
index 000000000000..311af4ee1a0f
--- /dev/null
+++ b/servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.h
@@ -0,0 +1,110 @@
+/**************************************************************************/
+/* fsr3_upscaler.h */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#pragma once
+
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_accumulate_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_autogen_reactive_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_debug_view_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_instability_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_pyramid_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_inputs_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_reactivity_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_rcas_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pass.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pyramid_pass.glsl.gen.h"
+
+#include "ffx_common.h"
+#include "servers/rendering/rendering_server.h"
+
+#include "thirdparty/amd-ffx/ffx_fsr3upscaler.h"
+
+namespace RendererRD {
+class FSR3UpscalerContext {
+public:
+ FFXCommon::Scratch scratch;
+ FfxFsr3UpscalerContext fsr_context;
+ FfxFsr3UpscalerContextDescription fsr_desc;
+
+ // Output resources from FSR3 Upscaler that are required for frame generation
+ FfxResourceInternal reconstructed_prev_nearest_depth;
+ FfxResourceInternal dilated_depth;
+ FfxResourceInternal dilated_motion_vectors;
+ // Only if autogen reactive is used
+ RID generated_reactive_mask = RID();
+
+ ~FSR3UpscalerContext();
+};
+
+class FSR3UpscalerEffect {
+public:
+ struct Parameters {
+ FSR3UpscalerContext *context;
+ Size2i internal_size;
+ Size2i target_size;
+ RID color;
+ RID depth;
+ RID velocity;
+ RID reactive;
+ RID opaque_only;
+ RID exposure;
+ RID output;
+ float z_near = 0.0f;
+ float z_far = 0.0f;
+ float fovy = 0.0f;
+ Vector2 jitter;
+ float delta_time = 0.0f;
+ float sharpness = 0.0f;
+ bool reset_accumulation = false;
+ Projection reprojection;
+ };
+
+ FSR3UpscalerEffect();
+ ~FSR3UpscalerEffect();
+ FSR3UpscalerContext *create_context(Size2i p_internal_size, Size2i p_target_size, bool p_autogen_reactive);
+ void upscale(const Parameters &p_params);
+
+private:
+ struct {
+ Fsr3UpscalerPrepareInputsPassShaderRD prepare_inputs;
+ Fsr3UpscalerLumaPyramidPassShaderRD luma_pyramid;
+ Fsr3UpscalerShadingChangePyramidPassShaderRD shading_change_pyramid;
+ Fsr3UpscalerShadingChangePassShaderRD shading_change;
+ Fsr3UpscalerPrepareReactivityPassShaderRD prepare_reactivity;
+ Fsr3UpscalerLumaInstabilityPassShaderRD luma_instability;
+ Fsr3UpscalerAccumulatePassShaderRD accumulate;
+ Fsr3UpscalerRcasPassShaderRD rcas;
+ Fsr3UpscalerDebugViewPassShaderRD debug_view;
+ Fsr3UpscalerAutogenReactivePassShaderRD autogen_reactive;
+ } shaders;
+
+ FFXCommon::Device device;
+};
+} //namespace RendererRD
diff --git a/servers/rendering/renderer_rd/effects/fsr.cpp b/servers/rendering/renderer_rd/effects/fsr.cpp
deleted file mode 100644
index 1c0b5f7c14b3..000000000000
--- a/servers/rendering/renderer_rd/effects/fsr.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/**************************************************************************/
-/* fsr.cpp */
-/**************************************************************************/
-/* This file is part of: */
-/* GODOT ENGINE */
-/* https://godotengine.org */
-/**************************************************************************/
-/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
-/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
-/* */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the */
-/* "Software"), to deal in the Software without restriction, including */
-/* without limitation the rights to use, copy, modify, merge, publish, */
-/* distribute, sublicense, and/or sell copies of the Software, and to */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions: */
-/* */
-/* The above copyright notice and this permission notice shall be */
-/* included in all copies or substantial portions of the Software. */
-/* */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
-/**************************************************************************/
-
-#include "fsr.h"
-#include "../storage_rd/material_storage.h"
-#include "../uniform_set_cache_rd.h"
-
-using namespace RendererRD;
-
-FSR::FSR() {
- Vector fsr_upscale_modes;
- fsr_upscale_modes.push_back("\n#define MODE_FSR_UPSCALE_NORMAL\n");
- fsr_upscale_modes.push_back("\n#define MODE_FSR_UPSCALE_FALLBACK\n");
- fsr_shader.initialize(fsr_upscale_modes);
-
- FSRShaderVariant variant;
- if (RD::get_singleton()->has_feature(RD::SUPPORTS_HALF_FLOAT)) {
- variant = FSR_SHADER_VARIANT_NORMAL;
- } else {
- variant = FSR_SHADER_VARIANT_FALLBACK;
- }
-
- shader_version = fsr_shader.version_create();
- pipeline.create_compute_pipeline(fsr_shader.version_get_shader(shader_version, variant));
-}
-
-FSR::~FSR() {
- pipeline.free();
- fsr_shader.version_free(shader_version);
-}
-
-void FSR::process(Ref p_render_buffers, RID p_source_rd_texture, RID p_destination_texture) {
- UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton();
- ERR_FAIL_NULL(uniform_set_cache);
- MaterialStorage *material_storage = MaterialStorage::get_singleton();
- ERR_FAIL_NULL(material_storage);
-
- Size2i internal_size = p_render_buffers->get_internal_size();
- Size2i target_size = p_render_buffers->get_target_size();
- float fsr_upscale_sharpness = p_render_buffers->get_fsr_sharpness();
-
- if (!p_render_buffers->has_texture(SNAME("FSR"), SNAME("upscale_texture"))) {
- RD::DataFormat format = p_render_buffers->get_base_data_format();
- uint32_t usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
- uint32_t layers = 1; // we only need one layer, in multiview we're processing one layer at a time.
-
- p_render_buffers->create_texture(SNAME("FSR"), SNAME("upscale_texture"), format, usage_bits, RD::TEXTURE_SAMPLES_1, target_size, layers);
- }
-
- RID upscale_texture = p_render_buffers->get_texture(SNAME("FSR"), SNAME("upscale_texture"));
-
- FSRUpscalePushConstant push_constant;
- memset(&push_constant, 0, sizeof(FSRUpscalePushConstant));
-
- int dispatch_x = (target_size.x + 15) / 16;
- int dispatch_y = (target_size.y + 15) / 16;
-
- RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
- RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, pipeline.get_rid());
-
- push_constant.resolution_width = internal_size.width;
- push_constant.resolution_height = internal_size.height;
- push_constant.upscaled_width = target_size.width;
- push_constant.upscaled_height = target_size.height;
- push_constant.sharpness = fsr_upscale_sharpness;
-
- RID shader = fsr_shader.version_get_shader(shader_version, 0);
- ERR_FAIL_COND(shader.is_null());
-
- RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED);
-
- //FSR Easc
- RD::Uniform u_source_rd_texture(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, { default_sampler, p_source_rd_texture });
- RD::Uniform u_upscale_texture(RD::UNIFORM_TYPE_IMAGE, 0, { upscale_texture });
-
- push_constant.pass = FSR_UPSCALE_PASS_EASU;
- RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_source_rd_texture), 0);
- RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_upscale_texture), 1);
-
- RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(FSRUpscalePushConstant));
-
- RD::get_singleton()->compute_list_dispatch(compute_list, dispatch_x, dispatch_y, 1);
- RD::get_singleton()->compute_list_add_barrier(compute_list);
-
- //FSR Rcas
- RD::Uniform u_upscale_texture_with_sampler(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, { default_sampler, upscale_texture });
- RD::Uniform u_destination_texture(RD::UNIFORM_TYPE_IMAGE, 0, { p_destination_texture });
-
- push_constant.pass = FSR_UPSCALE_PASS_RCAS;
- RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_upscale_texture_with_sampler), 0);
- RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_destination_texture), 1);
-
- RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(FSRUpscalePushConstant));
-
- RD::get_singleton()->compute_list_dispatch(compute_list, dispatch_x, dispatch_y, 1);
-
- RD::get_singleton()->compute_list_end();
-}
diff --git a/servers/rendering/renderer_rd/effects/fsr2.cpp b/servers/rendering/renderer_rd/effects/fsr2.cpp
deleted file mode 100644
index a0a8d8a5b369..000000000000
--- a/servers/rendering/renderer_rd/effects/fsr2.cpp
+++ /dev/null
@@ -1,880 +0,0 @@
-/**************************************************************************/
-/* fsr2.cpp */
-/**************************************************************************/
-/* This file is part of: */
-/* GODOT ENGINE */
-/* https://godotengine.org */
-/**************************************************************************/
-/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
-/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
-/* */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the */
-/* "Software"), to deal in the Software without restriction, including */
-/* without limitation the rights to use, copy, modify, merge, publish, */
-/* distribute, sublicense, and/or sell copies of the Software, and to */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions: */
-/* */
-/* The above copyright notice and this permission notice shall be */
-/* included in all copies or substantial portions of the Software. */
-/* */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
-/**************************************************************************/
-
-#include "fsr2.h"
-
-#include "../storage_rd/material_storage.h"
-#include "../uniform_set_cache_rd.h"
-
-using namespace RendererRD;
-
-#ifndef _MSC_VER
-#include
-#define wcscpy_s wcscpy
-#endif
-
-static RD::TextureType ffx_resource_type_to_rd_texture_type(FfxResourceType p_type) {
- switch (p_type) {
- case FFX_RESOURCE_TYPE_TEXTURE1D:
- return RD::TEXTURE_TYPE_1D;
- case FFX_RESOURCE_TYPE_TEXTURE2D:
- return RD::TEXTURE_TYPE_2D;
- case FFX_RESOURCE_TYPE_TEXTURE3D:
- return RD::TEXTURE_TYPE_3D;
- default:
- return RD::TEXTURE_TYPE_MAX;
- }
-}
-
-static FfxResourceType rd_texture_type_to_ffx_resource_type(RD::TextureType p_type) {
- switch (p_type) {
- case RD::TEXTURE_TYPE_1D:
- return FFX_RESOURCE_TYPE_TEXTURE1D;
- case RD::TEXTURE_TYPE_2D:
- return FFX_RESOURCE_TYPE_TEXTURE2D;
- case RD::TEXTURE_TYPE_3D:
- return FFX_RESOURCE_TYPE_TEXTURE3D;
- default:
- return FFX_RESOURCE_TYPE_BUFFER;
- }
-}
-
-static RD::DataFormat ffx_surface_format_to_rd_format(FfxSurfaceFormat p_format) {
- switch (p_format) {
- case FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS:
- return RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
- case FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT:
- return RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
- case FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT:
- return RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
- case FFX_SURFACE_FORMAT_R16G16B16A16_UNORM:
- return RD::DATA_FORMAT_R16G16B16A16_UNORM;
- case FFX_SURFACE_FORMAT_R32G32_FLOAT:
- return RD::DATA_FORMAT_R32G32_SFLOAT;
- case FFX_SURFACE_FORMAT_R32_UINT:
- return RD::DATA_FORMAT_R32_UINT;
- case FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS:
- return RD::DATA_FORMAT_R8G8B8A8_UNORM;
- case FFX_SURFACE_FORMAT_R8G8B8A8_UNORM:
- return RD::DATA_FORMAT_R8G8B8A8_UNORM;
- case FFX_SURFACE_FORMAT_R11G11B10_FLOAT:
- return RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32;
- case FFX_SURFACE_FORMAT_R16G16_FLOAT:
- return RD::DATA_FORMAT_R16G16_SFLOAT;
- case FFX_SURFACE_FORMAT_R16G16_UINT:
- return RD::DATA_FORMAT_R16G16_UINT;
- case FFX_SURFACE_FORMAT_R16_FLOAT:
- return RD::DATA_FORMAT_R16_SFLOAT;
- case FFX_SURFACE_FORMAT_R16_UINT:
- return RD::DATA_FORMAT_R16_UINT;
- case FFX_SURFACE_FORMAT_R16_UNORM:
- return RD::DATA_FORMAT_R16_UNORM;
- case FFX_SURFACE_FORMAT_R16_SNORM:
- return RD::DATA_FORMAT_R16_SNORM;
- case FFX_SURFACE_FORMAT_R8_UNORM:
- return RD::DATA_FORMAT_R8_UNORM;
- case FFX_SURFACE_FORMAT_R8_UINT:
- return RD::DATA_FORMAT_R8_UINT;
- case FFX_SURFACE_FORMAT_R8G8_UNORM:
- return RD::DATA_FORMAT_R8G8_UNORM;
- case FFX_SURFACE_FORMAT_R32_FLOAT:
- return RD::DATA_FORMAT_R32_SFLOAT;
- default:
- return RD::DATA_FORMAT_MAX;
- }
-}
-
-static FfxSurfaceFormat rd_format_to_ffx_surface_format(RD::DataFormat p_format) {
- switch (p_format) {
- case RD::DATA_FORMAT_R32G32B32A32_SFLOAT:
- return FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT;
- case RD::DATA_FORMAT_R16G16B16A16_SFLOAT:
- return FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT;
- case RD::DATA_FORMAT_R16G16B16A16_UNORM:
- return FFX_SURFACE_FORMAT_R16G16B16A16_UNORM;
- case RD::DATA_FORMAT_R32G32_SFLOAT:
- return FFX_SURFACE_FORMAT_R32G32_FLOAT;
- case RD::DATA_FORMAT_R32_UINT:
- return FFX_SURFACE_FORMAT_R32_UINT;
- case RD::DATA_FORMAT_R8G8B8A8_UNORM:
- return FFX_SURFACE_FORMAT_R8G8B8A8_UNORM;
- case RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32:
- return FFX_SURFACE_FORMAT_R11G11B10_FLOAT;
- case RD::DATA_FORMAT_R16G16_SFLOAT:
- return FFX_SURFACE_FORMAT_R16G16_FLOAT;
- case RD::DATA_FORMAT_R16G16_UINT:
- return FFX_SURFACE_FORMAT_R16G16_UINT;
- case RD::DATA_FORMAT_R16_SFLOAT:
- return FFX_SURFACE_FORMAT_R16_FLOAT;
- case RD::DATA_FORMAT_R16_UINT:
- return FFX_SURFACE_FORMAT_R16_UINT;
- case RD::DATA_FORMAT_R16_UNORM:
- return FFX_SURFACE_FORMAT_R16_UNORM;
- case RD::DATA_FORMAT_R16_SNORM:
- return FFX_SURFACE_FORMAT_R16_SNORM;
- case RD::DATA_FORMAT_R8_UNORM:
- return FFX_SURFACE_FORMAT_R8_UNORM;
- case RD::DATA_FORMAT_R8_UINT:
- return FFX_SURFACE_FORMAT_R8_UINT;
- case RD::DATA_FORMAT_R8G8_UNORM:
- return FFX_SURFACE_FORMAT_R8G8_UNORM;
- case RD::DATA_FORMAT_R32_SFLOAT:
- return FFX_SURFACE_FORMAT_R32_FLOAT;
- default:
- return FFX_SURFACE_FORMAT_UNKNOWN;
- }
-}
-
-static uint32_t ffx_usage_to_rd_usage_flags(uint32_t p_flags) {
- uint32_t ret = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
-
- if (p_flags & FFX_RESOURCE_USAGE_RENDERTARGET) {
- ret |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
- }
-
- if (p_flags & FFX_RESOURCE_USAGE_UAV) {
- ret |= RD::TEXTURE_USAGE_STORAGE_BIT;
- ret |= RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT;
- ret |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
- }
-
- return ret;
-}
-
-static FfxErrorCode create_backend_context_rd(FfxFsr2Interface *p_backend_interface, FfxDevice p_device) {
- FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
-
- // Store pointer to the device common to all contexts.
- scratch.device = p_device;
-
- // Create a ring buffer of uniform buffers.
- // FIXME: This could be optimized to be a single memory block if it was possible for RD to create views into a particular memory range of a UBO.
- for (uint32_t i = 0; i < FSR2_UBO_RING_BUFFER_SIZE; i++) {
- scratch.ubo_ring_buffer[i] = RD::get_singleton()->uniform_buffer_create(FFX_MAX_CONST_SIZE * sizeof(uint32_t));
- ERR_FAIL_COND_V(scratch.ubo_ring_buffer[i].is_null(), FFX_ERROR_BACKEND_API_ERROR);
- }
-
- return FFX_OK;
-}
-
-static FfxErrorCode get_device_capabilities_rd(FfxFsr2Interface *p_backend_interface, FfxDeviceCapabilities *p_out_device_capabilities, FfxDevice p_device) {
- FSR2Effect::Device &effect_device = *reinterpret_cast(p_device);
-
- *p_out_device_capabilities = effect_device.capabilities;
-
- return FFX_OK;
-}
-
-static FfxErrorCode destroy_backend_context_rd(FfxFsr2Interface *p_backend_interface) {
- FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
-
- for (uint32_t i = 0; i < FSR2_UBO_RING_BUFFER_SIZE; i++) {
- RD::get_singleton()->free_rid(scratch.ubo_ring_buffer[i]);
- }
-
- return FFX_OK;
-}
-
-static FfxErrorCode create_resource_rd(FfxFsr2Interface *p_backend_interface, const FfxCreateResourceDescription *p_create_resource_description, FfxResourceInternal *p_out_resource) {
- // FSR2's base implementation won't issue a call to create a heap type that isn't just default on its own,
- // so we can safely ignore it as RD does not expose this concept.
- ERR_FAIL_COND_V(p_create_resource_description->heapType != FFX_HEAP_TYPE_DEFAULT, FFX_ERROR_INVALID_ARGUMENT);
-
- RenderingDevice *rd = RD::get_singleton();
- FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
- FfxResourceDescription res_desc = p_create_resource_description->resourceDescription;
-
- // FSR2's base implementation never requests buffer creation.
- ERR_FAIL_COND_V(res_desc.type != FFX_RESOURCE_TYPE_TEXTURE1D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE2D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE3D, FFX_ERROR_INVALID_ARGUMENT);
-
- if (res_desc.mipCount == 0) {
- // Mipmap count must be derived from the resource's dimensions.
- res_desc.mipCount = uint32_t(1 + std::floor(std::log2(MAX(MAX(res_desc.width, res_desc.height), res_desc.depth))));
- }
-
- Vector initial_data;
- if (p_create_resource_description->initDataSize) {
- PackedByteArray byte_array;
- byte_array.resize(p_create_resource_description->initDataSize);
- memcpy(byte_array.ptrw(), p_create_resource_description->initData, p_create_resource_description->initDataSize);
- initial_data.push_back(byte_array);
- }
-
- RD::TextureFormat texture_format;
- texture_format.texture_type = ffx_resource_type_to_rd_texture_type(res_desc.type);
- texture_format.format = ffx_surface_format_to_rd_format(res_desc.format);
- texture_format.usage_bits = ffx_usage_to_rd_usage_flags(p_create_resource_description->usage);
- texture_format.width = res_desc.width;
- texture_format.height = res_desc.height;
- texture_format.depth = res_desc.depth;
- texture_format.mipmaps = res_desc.mipCount;
- texture_format.is_discardable = true;
-
- RID texture = rd->texture_create(texture_format, RD::TextureView(), initial_data);
- ERR_FAIL_COND_V(texture.is_null(), FFX_ERROR_BACKEND_API_ERROR);
-
- rd->set_resource_name(texture, String(p_create_resource_description->name));
-
- // Add the resource to the storage and use the internal index to reference it.
- p_out_resource->internalIndex = scratch.resources.add(texture, false, p_create_resource_description->id, res_desc);
-
- return FFX_OK;
-}
-
-static FfxErrorCode register_resource_rd(FfxFsr2Interface *p_backend_interface, const FfxResource *p_in_resource, FfxResourceInternal *p_out_resource) {
- if (p_in_resource->resource == nullptr) {
- // Null resource case.
- p_out_resource->internalIndex = -1;
- return FFX_OK;
- }
-
- FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
- const RID &rid = *reinterpret_cast(p_in_resource->resource);
- ERR_FAIL_COND_V(rid.is_null(), FFX_ERROR_INVALID_ARGUMENT);
-
- // Add the resource to the storage and use the internal index to reference it.
- p_out_resource->internalIndex = scratch.resources.add(rid, true, FSR2Context::RESOURCE_ID_DYNAMIC, p_in_resource->description);
-
- return FFX_OK;
-}
-
-static FfxErrorCode unregister_resources_rd(FfxFsr2Interface *p_backend_interface) {
- FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
- LocalVector dynamic_list_copy = scratch.resources.dynamic_list;
- for (uint32_t i : dynamic_list_copy) {
- scratch.resources.remove(i);
- }
-
- return FFX_OK;
-}
-
-static FfxResourceDescription get_resource_description_rd(FfxFsr2Interface *p_backend_interface, FfxResourceInternal p_resource) {
- if (p_resource.internalIndex != -1) {
- FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
- return scratch.resources.descriptions[p_resource.internalIndex];
- } else {
- return {};
- }
-}
-
-static FfxErrorCode destroy_resource_rd(FfxFsr2Interface *p_backend_interface, FfxResourceInternal p_resource) {
- if (p_resource.internalIndex != -1) {
- FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
- if (scratch.resources.rids[p_resource.internalIndex].is_valid()) {
- RD::get_singleton()->free_rid(scratch.resources.rids[p_resource.internalIndex]);
- scratch.resources.remove(p_resource.internalIndex);
- }
- }
-
- return FFX_OK;
-}
-
-static FfxErrorCode create_pipeline_rd(FfxFsr2Interface *p_backend_interface, FfxFsr2Pass p_pass, const FfxPipelineDescription *p_pipeline_description, FfxPipelineState *p_out_pipeline) {
- FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
- FSR2Effect::Device &device = *reinterpret_cast(scratch.device);
- FSR2Effect::Pass &effect_pass = device.passes[p_pass];
-
- if (effect_pass.pipeline.pipeline_rid.is_null()) {
- // Create pipeline for the device if it hasn't been created yet.
- effect_pass.root_signature.shader_rid = effect_pass.shader->version_get_shader(effect_pass.shader_version, effect_pass.shader_variant);
- ERR_FAIL_COND_V(effect_pass.root_signature.shader_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR);
-
- effect_pass.pipeline.pipeline_rid = RD::get_singleton()->compute_pipeline_create(effect_pass.root_signature.shader_rid);
- ERR_FAIL_COND_V(effect_pass.pipeline.pipeline_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR);
- }
-
- // While this is not their intended use, we use the pipeline and root signature pointers to store the
- // RIDs to the pipeline and shader that RD needs for the compute pipeline.
- p_out_pipeline->pipeline = reinterpret_cast(&effect_pass.pipeline);
- p_out_pipeline->rootSignature = reinterpret_cast(&effect_pass.root_signature);
-
- p_out_pipeline->srvCount = effect_pass.sampled_bindings.size();
- ERR_FAIL_COND_V(p_out_pipeline->srvCount > FFX_MAX_NUM_SRVS, FFX_ERROR_OUT_OF_RANGE);
- memcpy(p_out_pipeline->srvResourceBindings, effect_pass.sampled_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->srvCount);
-
- p_out_pipeline->uavCount = effect_pass.storage_bindings.size();
- ERR_FAIL_COND_V(p_out_pipeline->uavCount > FFX_MAX_NUM_UAVS, FFX_ERROR_OUT_OF_RANGE);
- memcpy(p_out_pipeline->uavResourceBindings, effect_pass.storage_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->uavCount);
-
- p_out_pipeline->constCount = effect_pass.uniform_bindings.size();
- ERR_FAIL_COND_V(p_out_pipeline->constCount > FFX_MAX_NUM_CONST_BUFFERS, FFX_ERROR_OUT_OF_RANGE);
- memcpy(p_out_pipeline->cbResourceBindings, effect_pass.uniform_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->constCount);
-
- bool low_resolution_mvs = (p_pipeline_description->contextFlags & FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) == 0;
-
- if (p_pass == FFX_FSR2_PASS_ACCUMULATE || p_pass == FFX_FSR2_PASS_ACCUMULATE_SHARPEN) {
- // Change the binding for motion vectors in this particular pass if low resolution MVs are used.
- if (low_resolution_mvs) {
- FfxResourceBinding &binding = p_out_pipeline->srvResourceBindings[2];
- wcscpy_s(binding.name, L"r_dilated_motion_vectors");
- }
- }
-
- return FFX_OK;
-}
-
-static FfxErrorCode destroy_pipeline_rd(FfxFsr2Interface *p_backend_interface, FfxPipelineState *p_pipeline) {
- // We don't want to destroy pipelines when the FSR2 API deems it necessary as it'll do so whenever the context is destroyed.
-
- return FFX_OK;
-}
-
-static FfxErrorCode schedule_gpu_job_rd(FfxFsr2Interface *p_backend_interface, const FfxGpuJobDescription *p_job) {
- ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT);
- ERR_FAIL_NULL_V(p_job, FFX_ERROR_INVALID_ARGUMENT);
-
- FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
- scratch.gpu_jobs.push_back(*p_job);
-
- return FFX_OK;
-}
-
-static FfxErrorCode execute_gpu_job_clear_float_rd(FSR2Context::Scratch &p_scratch, const FfxClearFloatJobDescription &p_job) {
- RID resource = p_scratch.resources.rids[p_job.target.internalIndex];
- FfxResourceDescription &desc = p_scratch.resources.descriptions[p_job.target.internalIndex];
-
- ERR_FAIL_COND_V(desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT);
-
- Color color(p_job.color[0], p_job.color[1], p_job.color[2], p_job.color[3]);
- RD::get_singleton()->texture_clear(resource, color, 0, desc.mipCount, 0, 1);
-
- return FFX_OK;
-}
-
-static FfxErrorCode execute_gpu_job_copy_rd(FSR2Context::Scratch &p_scratch, const FfxCopyJobDescription &p_job) {
- RID src = p_scratch.resources.rids[p_job.src.internalIndex];
- RID dst = p_scratch.resources.rids[p_job.dst.internalIndex];
- FfxResourceDescription &src_desc = p_scratch.resources.descriptions[p_job.src.internalIndex];
- FfxResourceDescription &dst_desc = p_scratch.resources.descriptions[p_job.dst.internalIndex];
-
- ERR_FAIL_COND_V(src_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT);
- ERR_FAIL_COND_V(dst_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT);
-
- for (uint32_t mip_level = 0; mip_level < src_desc.mipCount; mip_level++) {
- RD::get_singleton()->texture_copy(src, dst, Vector3(0, 0, 0), Vector3(0, 0, 0), Vector3(src_desc.width, src_desc.height, src_desc.depth), mip_level, mip_level, 0, 0);
- }
-
- return FFX_OK;
-}
-
-static FfxErrorCode execute_gpu_job_compute_rd(FSR2Context::Scratch &p_scratch, const FfxComputeJobDescription &p_job) {
- UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton();
- ERR_FAIL_NULL_V(uniform_set_cache, FFX_ERROR_BACKEND_API_ERROR);
-
- FSR2Effect::RootSignature &root_signature = *reinterpret_cast(p_job.pipeline.rootSignature);
- ERR_FAIL_COND_V(root_signature.shader_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT);
-
- FSR2Effect::Pipeline &backend_pipeline = *reinterpret_cast(p_job.pipeline.pipeline);
- ERR_FAIL_COND_V(backend_pipeline.pipeline_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT);
-
- thread_local LocalVector compute_uniforms;
- compute_uniforms.clear();
-
- for (uint32_t i = 0; i < p_job.pipeline.srvCount; i++) {
- RID texture_rid = p_scratch.resources.rids[p_job.srvs[i].internalIndex];
- RD::Uniform texture_uniform(RD::UNIFORM_TYPE_TEXTURE, p_job.pipeline.srvResourceBindings[i].slotIndex, texture_rid);
- compute_uniforms.push_back(texture_uniform);
- }
-
- for (uint32_t i = 0; i < p_job.pipeline.uavCount; i++) {
- RID image_rid = p_scratch.resources.rids[p_job.uavs[i].internalIndex];
- RD::Uniform storage_uniform;
- storage_uniform.uniform_type = RD::UNIFORM_TYPE_IMAGE;
- storage_uniform.binding = p_job.pipeline.uavResourceBindings[i].slotIndex;
-
- if (p_job.uavMip[i] > 0) {
- LocalVector &mip_slice_rids = p_scratch.resources.mip_slice_rids[p_job.uavs[i].internalIndex];
- if (mip_slice_rids.is_empty()) {
- mip_slice_rids.resize(p_scratch.resources.descriptions[p_job.uavs[i].internalIndex].mipCount);
- }
-
- ERR_FAIL_COND_V(p_job.uavMip[i] >= mip_slice_rids.size(), FFX_ERROR_INVALID_ARGUMENT);
-
- if (mip_slice_rids[p_job.uavMip[i]].is_null()) {
- mip_slice_rids[p_job.uavMip[i]] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), image_rid, 0, p_job.uavMip[i]);
- }
-
- ERR_FAIL_COND_V(mip_slice_rids[p_job.uavMip[i]].is_null(), FFX_ERROR_BACKEND_API_ERROR);
-
- storage_uniform.append_id(mip_slice_rids[p_job.uavMip[i]]);
- } else {
- storage_uniform.append_id(image_rid);
- }
-
- compute_uniforms.push_back(storage_uniform);
- }
-
- for (uint32_t i = 0; i < p_job.pipeline.constCount; i++) {
- RID buffer_rid = p_scratch.ubo_ring_buffer[p_scratch.ubo_ring_buffer_index];
- p_scratch.ubo_ring_buffer_index = (p_scratch.ubo_ring_buffer_index + 1) % FSR2_UBO_RING_BUFFER_SIZE;
-
- RD::get_singleton()->buffer_update(buffer_rid, 0, p_job.cbs[i].uint32Size * sizeof(uint32_t), p_job.cbs[i].data);
-
- RD::Uniform buffer_uniform(RD::UNIFORM_TYPE_UNIFORM_BUFFER, p_job.pipeline.cbResourceBindings[i].slotIndex, buffer_rid);
- compute_uniforms.push_back(buffer_uniform);
- }
-
- FSR2Effect::Device &device = *reinterpret_cast(p_scratch.device);
- RD::Uniform u_point_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 0, device.point_clamp_sampler);
- RD::Uniform u_linear_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 1, device.linear_clamp_sampler);
-
- RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
- RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, backend_pipeline.pipeline_rid);
- RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(root_signature.shader_rid, 0, u_point_clamp_sampler, u_linear_clamp_sampler), 0);
- RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache_vec(root_signature.shader_rid, 1, compute_uniforms), 1);
- RD::get_singleton()->compute_list_dispatch(compute_list, p_job.dimensions[0], p_job.dimensions[1], p_job.dimensions[2]);
- RD::get_singleton()->compute_list_end();
-
- return FFX_OK;
-}
-
-static FfxErrorCode execute_gpu_jobs_rd(FfxFsr2Interface *p_backend_interface, FfxCommandList p_command_list) {
- ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT);
-
- FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer);
- FfxErrorCode error_code = FFX_OK;
- for (const FfxGpuJobDescription &job : scratch.gpu_jobs) {
- switch (job.jobType) {
- case FFX_GPU_JOB_CLEAR_FLOAT: {
- error_code = execute_gpu_job_clear_float_rd(scratch, job.clearJobDescriptor);
- } break;
- case FFX_GPU_JOB_COPY: {
- error_code = execute_gpu_job_copy_rd(scratch, job.copyJobDescriptor);
- } break;
- case FFX_GPU_JOB_COMPUTE: {
- error_code = execute_gpu_job_compute_rd(scratch, job.computeJobDescriptor);
- } break;
- default: {
- error_code = FFX_ERROR_INVALID_ARGUMENT;
- } break;
- }
-
- if (error_code != FFX_OK) {
- scratch.gpu_jobs.clear();
- return error_code;
- }
- }
-
- scratch.gpu_jobs.clear();
-
- return FFX_OK;
-}
-
-static FfxResource get_resource_rd(RID *p_rid, const wchar_t *p_name) {
- FfxResource res = {};
- if (p_rid->is_null()) {
- return res;
- }
-
- wcscpy_s(res.name, p_name);
-
- RD::TextureFormat texture_format = RD::get_singleton()->texture_get_format(*p_rid);
- res.description.type = rd_texture_type_to_ffx_resource_type(texture_format.texture_type);
- res.description.format = rd_format_to_ffx_surface_format(texture_format.format);
- res.description.width = texture_format.width;
- res.description.height = texture_format.height;
- res.description.depth = texture_format.depth;
- res.description.mipCount = texture_format.mipmaps;
- res.description.flags = FFX_RESOURCE_FLAGS_NONE;
- res.resource = reinterpret_cast(p_rid);
- res.isDepth = texture_format.usage_bits & RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
-
- return res;
-}
-
-FSR2Context::~FSR2Context() {
- ffxFsr2ContextDestroy(&fsr_context);
-}
-
-FSR2Effect::FSR2Effect() {
- FfxDeviceCapabilities &capabilities = device.capabilities;
- capabilities.minimumSupportedShaderModel = FFX_SHADER_MODEL_5_1;
- capabilities.waveLaneCountMin = 32;
- capabilities.waveLaneCountMax = 32;
- capabilities.fp16Supported = RD::get_singleton()->has_feature(RD::Features::SUPPORTS_HALF_FLOAT);
- capabilities.raytracingSupported = false;
-
- String general_defines =
- "\n#define FFX_GPU\n"
- "\n#define FFX_GLSL 1\n"
- "\n#define FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS 1\n"
- "\n#define FFX_FSR2_OPTION_HDR_COLOR_INPUT 1\n"
- "\n#define FFX_FSR2_OPTION_INVERTED_DEPTH 1\n"
- "\n#define FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP 1\n"
- "\n#define FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS 1\n";
-
- Vector modes_single;
- modes_single.push_back("");
-
- Vector modes_with_fp16;
- modes_with_fp16.push_back("");
- modes_with_fp16.push_back("\n#define FFX_HALF 1\n");
-
- // Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and
- // there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL
- // files included in FSR2 and mapping the macro bindings (#define FSR2_BIND_*) to their respective implementation names.
- //
- // It is not guaranteed these will remain consistent at all between versions of FSR2, so it'll be necessary to keep these
- // bindings up to date whenever the library is updated. In such cases, it is very likely the validation layer will throw an
- // error if the bindings do not match.
-
- {
- Pass &pass = device.passes[FFX_FSR2_PASS_DEPTH_CLIP];
- pass.shader = &shaders.depth_clip;
- pass.shader->initialize(modes_with_fp16, general_defines);
- pass.shader_version = pass.shader->version_create();
- pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
-
- pass.sampled_bindings = {
- FfxResourceBinding{ 0, 0, L"r_reconstructed_previous_nearest_depth" },
- FfxResourceBinding{ 1, 0, L"r_dilated_motion_vectors" },
- FfxResourceBinding{ 2, 0, L"r_dilatedDepth" },
- FfxResourceBinding{ 3, 0, L"r_reactive_mask" },
- FfxResourceBinding{ 4, 0, L"r_transparency_and_composition_mask" },
- FfxResourceBinding{ 6, 0, L"r_previous_dilated_motion_vectors" },
- FfxResourceBinding{ 7, 0, L"r_input_motion_vectors" },
- FfxResourceBinding{ 8, 0, L"r_input_color_jittered" },
- FfxResourceBinding{ 9, 0, L"r_input_depth" },
- FfxResourceBinding{ 10, 0, L"r_input_exposure" }
- };
-
- pass.storage_bindings = {
- // FSR2_BIND_UAV_DEPTH_CLIP (11) does not point to anything.
- FfxResourceBinding{ 12, 0, L"rw_dilated_reactive_masks" },
- FfxResourceBinding{ 13, 0, L"rw_prepared_input_color" }
- };
-
- pass.uniform_bindings = {
- FfxResourceBinding{ 14, 0, L"cbFSR2" }
- };
- }
-
- {
- Pass &pass = device.passes[FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH];
- pass.shader = &shaders.reconstruct_previous_depth;
- pass.shader->initialize(modes_with_fp16, general_defines);
- pass.shader_version = pass.shader->version_create();
- pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
-
- pass.sampled_bindings = {
- FfxResourceBinding{ 0, 0, L"r_input_motion_vectors" },
- FfxResourceBinding{ 1, 0, L"r_input_depth" },
- FfxResourceBinding{ 2, 0, L"r_input_color_jittered" },
- FfxResourceBinding{ 3, 0, L"r_input_exposure" },
- FfxResourceBinding{ 4, 0, L"r_luma_history" }
- };
-
- pass.storage_bindings = {
- FfxResourceBinding{ 5, 0, L"rw_reconstructed_previous_nearest_depth" },
- FfxResourceBinding{ 6, 0, L"rw_dilated_motion_vectors" },
- FfxResourceBinding{ 7, 0, L"rw_dilatedDepth" },
- FfxResourceBinding{ 8, 0, L"rw_prepared_input_color" },
- FfxResourceBinding{ 9, 0, L"rw_luma_history" },
- // FSR2_BIND_UAV_LUMA_INSTABILITY (10) does not point to anything.
- FfxResourceBinding{ 11, 0, L"rw_lock_input_luma" }
- };
-
- pass.uniform_bindings = {
- FfxResourceBinding{ 12, 0, L"cbFSR2" }
- };
- }
-
- {
- Pass &pass = device.passes[FFX_FSR2_PASS_LOCK];
- pass.shader = &shaders.lock;
- pass.shader->initialize(modes_with_fp16, general_defines);
- pass.shader_version = pass.shader->version_create();
- pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
-
- pass.sampled_bindings = {
- FfxResourceBinding{ 0, 0, L"r_lock_input_luma" }
- };
-
- pass.storage_bindings = {
- FfxResourceBinding{ 1, 0, L"rw_new_locks" },
- FfxResourceBinding{ 2, 0, L"rw_reconstructed_previous_nearest_depth" }
- };
-
- pass.uniform_bindings = {
- FfxResourceBinding{ 3, 0, L"cbFSR2" }
- };
- }
-
- {
- Vector accumulate_modes_with_fp16;
- accumulate_modes_with_fp16.push_back("\n");
- accumulate_modes_with_fp16.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");
- accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n");
- accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");
-
- // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput.
- const bool fp16_path_supported = RD::get_singleton()->get_device_vendor_name() != "NVIDIA";
- Pass &pass = device.passes[FFX_FSR2_PASS_ACCUMULATE];
- pass.shader = &shaders.accumulate;
- pass.shader->initialize(accumulate_modes_with_fp16, general_defines);
- pass.shader_version = pass.shader->version_create();
- pass.shader_variant = capabilities.fp16Supported && fp16_path_supported ? 2 : 0;
-
- pass.sampled_bindings = {
- FfxResourceBinding{ 0, 0, L"r_input_exposure" },
- FfxResourceBinding{ 1, 0, L"r_dilated_reactive_masks" },
- FfxResourceBinding{ 2, 0, L"r_input_motion_vectors" },
- FfxResourceBinding{ 3, 0, L"r_internal_upscaled_color" },
- FfxResourceBinding{ 4, 0, L"r_lock_status" },
- FfxResourceBinding{ 5, 0, L"r_input_depth" },
- FfxResourceBinding{ 6, 0, L"r_prepared_input_color" },
- // FSR2_BIND_SRV_LUMA_INSTABILITY(7) does not point to anything.
- FfxResourceBinding{ 8, 0, L"r_lanczos_lut" },
- FfxResourceBinding{ 9, 0, L"r_upsample_maximum_bias_lut" },
- FfxResourceBinding{ 10, 0, L"r_imgMips" },
- FfxResourceBinding{ 11, 0, L"r_auto_exposure" },
- FfxResourceBinding{ 12, 0, L"r_luma_history" }
- };
-
- pass.storage_bindings = {
- FfxResourceBinding{ 13, 0, L"rw_internal_upscaled_color" },
- FfxResourceBinding{ 14, 0, L"rw_lock_status" },
- FfxResourceBinding{ 15, 0, L"rw_upscaled_output" },
- FfxResourceBinding{ 16, 0, L"rw_new_locks" },
- FfxResourceBinding{ 17, 0, L"rw_luma_history" }
- };
-
- pass.uniform_bindings = {
- FfxResourceBinding{ 18, 0, L"cbFSR2" }
- };
-
- // Sharpen pass is a clone of the accumulate pass with the sharpening variant.
- Pass &sharpen_pass = device.passes[FFX_FSR2_PASS_ACCUMULATE_SHARPEN];
- sharpen_pass = pass;
- sharpen_pass.shader_variant = pass.shader_variant + 1;
- }
-
- {
- Pass &pass = device.passes[FFX_FSR2_PASS_RCAS];
- pass.shader = &shaders.rcas;
- pass.shader->initialize(modes_single, general_defines);
- pass.shader_version = pass.shader->version_create();
-
- pass.sampled_bindings = {
- FfxResourceBinding{ 0, 0, L"r_input_exposure" },
- FfxResourceBinding{ 1, 0, L"r_rcas_input" }
- };
-
- pass.storage_bindings = {
- FfxResourceBinding{ 2, 0, L"rw_upscaled_output" }
- };
-
- pass.uniform_bindings = {
- FfxResourceBinding{ 3, 0, L"cbFSR2" },
- FfxResourceBinding{ 4, 0, L"cbRCAS" }
- };
- }
-
- {
- Pass &pass = device.passes[FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID];
- pass.shader = &shaders.compute_luminance_pyramid;
- pass.shader->initialize(modes_single, general_defines);
- pass.shader_version = pass.shader->version_create();
-
- pass.sampled_bindings = {
- FfxResourceBinding{ 0, 0, L"r_input_color_jittered" }
- };
-
- pass.storage_bindings = {
- FfxResourceBinding{ 1, 0, L"rw_spd_global_atomic" },
- FfxResourceBinding{ 2, 0, L"rw_img_mip_shading_change" },
- FfxResourceBinding{ 3, 0, L"rw_img_mip_5" },
- FfxResourceBinding{ 4, 0, L"rw_auto_exposure" }
- };
-
- pass.uniform_bindings = {
- FfxResourceBinding{ 5, 0, L"cbFSR2" },
- FfxResourceBinding{ 6, 0, L"cbSPD" }
- };
- }
-
- {
- Pass &pass = device.passes[FFX_FSR2_PASS_GENERATE_REACTIVE];
- pass.shader = &shaders.autogen_reactive;
- pass.shader->initialize(modes_with_fp16, general_defines);
- pass.shader_version = pass.shader->version_create();
- pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
-
- pass.sampled_bindings = {
- FfxResourceBinding{ 0, 0, L"r_input_opaque_only" },
- FfxResourceBinding{ 1, 0, L"r_input_color_jittered" }
- };
-
- pass.storage_bindings = {
- FfxResourceBinding{ 2, 0, L"rw_output_autoreactive" }
- };
-
- pass.uniform_bindings = {
- FfxResourceBinding{ 3, 0, L"cbGenerateReactive" },
- FfxResourceBinding{ 4, 0, L"cbFSR2" }
- };
- }
-
- {
- Pass &pass = device.passes[FFX_FSR2_PASS_TCR_AUTOGENERATE];
- pass.shader = &shaders.tcr_autogen;
- pass.shader->initialize(modes_with_fp16, general_defines);
- pass.shader_version = pass.shader->version_create();
- pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
-
- pass.sampled_bindings = {
- FfxResourceBinding{ 0, 0, L"r_input_opaque_only" },
- FfxResourceBinding{ 1, 0, L"r_input_color_jittered" },
- FfxResourceBinding{ 2, 0, L"r_input_motion_vectors" },
- FfxResourceBinding{ 3, 0, L"r_input_prev_color_pre_alpha" },
- FfxResourceBinding{ 4, 0, L"r_input_prev_color_post_alpha" },
- FfxResourceBinding{ 5, 0, L"r_reactive_mask" },
- FfxResourceBinding{ 6, 0, L"r_transparency_and_composition_mask" },
- FfxResourceBinding{ 13, 0, L"r_input_depth" }
- };
-
- pass.storage_bindings = {
- FfxResourceBinding{ 7, 0, L"rw_output_autoreactive" },
- FfxResourceBinding{ 8, 0, L"rw_output_autocomposition" },
- FfxResourceBinding{ 9, 0, L"rw_output_prev_color_pre_alpha" },
- FfxResourceBinding{ 10, 0, L"rw_output_prev_color_post_alpha" }
- };
-
- pass.uniform_bindings = {
- FfxResourceBinding{ 11, 0, L"cbFSR2" },
- FfxResourceBinding{ 12, 0, L"cbGenerateReactive" }
- };
- }
-
- RD::SamplerState state;
- state.mag_filter = RD::SAMPLER_FILTER_NEAREST;
- state.min_filter = RD::SAMPLER_FILTER_NEAREST;
- state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
- state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
- state.repeat_w = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
- state.min_lod = -1000.0f;
- state.max_lod = 1000.0f;
- state.anisotropy_max = 1.0;
- device.point_clamp_sampler = RD::get_singleton()->sampler_create(state);
- ERR_FAIL_COND(device.point_clamp_sampler.is_null());
-
- state.mag_filter = RD::SAMPLER_FILTER_LINEAR;
- state.min_filter = RD::SAMPLER_FILTER_LINEAR;
- device.linear_clamp_sampler = RD::get_singleton()->sampler_create(state);
- ERR_FAIL_COND(device.linear_clamp_sampler.is_null());
-}
-
-FSR2Effect::~FSR2Effect() {
- RD::get_singleton()->free_rid(device.point_clamp_sampler);
- RD::get_singleton()->free_rid(device.linear_clamp_sampler);
-
- for (uint32_t i = 0; i < FFX_FSR2_PASS_COUNT; i++) {
- device.passes[i].shader->version_free(device.passes[i].shader_version);
- }
-}
-
-FSR2Context *FSR2Effect::create_context(Size2i p_internal_size, Size2i p_target_size) {
- FSR2Context *context = memnew(RendererRD::FSR2Context);
- context->fsr_desc.flags = FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE | FFX_FSR2_ENABLE_DEPTH_INVERTED;
- context->fsr_desc.maxRenderSize.width = p_internal_size.x;
- context->fsr_desc.maxRenderSize.height = p_internal_size.y;
- context->fsr_desc.displaySize.width = p_target_size.x;
- context->fsr_desc.displaySize.height = p_target_size.y;
- context->fsr_desc.device = &device;
-
- FfxFsr2Interface &functions = context->fsr_desc.callbacks;
- functions.fpCreateBackendContext = create_backend_context_rd;
- functions.fpGetDeviceCapabilities = get_device_capabilities_rd;
- functions.fpDestroyBackendContext = destroy_backend_context_rd;
- functions.fpCreateResource = create_resource_rd;
- functions.fpRegisterResource = register_resource_rd;
- functions.fpUnregisterResources = unregister_resources_rd;
- functions.fpGetResourceDescription = get_resource_description_rd;
- functions.fpDestroyResource = destroy_resource_rd;
- functions.fpCreatePipeline = create_pipeline_rd;
- functions.fpDestroyPipeline = destroy_pipeline_rd;
- functions.fpScheduleGpuJob = schedule_gpu_job_rd;
- functions.fpExecuteGpuJobs = execute_gpu_jobs_rd;
- functions.scratchBuffer = &context->scratch;
- functions.scratchBufferSize = sizeof(context->scratch);
-
- FfxErrorCode result = ffxFsr2ContextCreate(&context->fsr_context, &context->fsr_desc);
- if (result == FFX_OK) {
- return context;
- } else {
- memdelete(context);
- return nullptr;
- }
-}
-
-void FSR2Effect::upscale(const Parameters &p_params) {
- // TODO: Transparency & Composition mask is not implemented.
- FfxFsr2DispatchDescription dispatch_desc = {};
- RID color = p_params.color;
- RID depth = p_params.depth;
- RID velocity = p_params.velocity;
- RID reactive = p_params.reactive;
- RID exposure = p_params.exposure;
- RID output = p_params.output;
- dispatch_desc.commandList = nullptr;
- dispatch_desc.color = get_resource_rd(&color, L"color");
- dispatch_desc.depth = get_resource_rd(&depth, L"depth");
- dispatch_desc.motionVectors = get_resource_rd(&velocity, L"velocity");
- dispatch_desc.reactive = get_resource_rd(&reactive, L"reactive");
- dispatch_desc.exposure = get_resource_rd(&exposure, L"exposure");
- dispatch_desc.transparencyAndComposition = {};
- dispatch_desc.output = get_resource_rd(&output, L"output");
- dispatch_desc.colorOpaqueOnly = {};
- dispatch_desc.jitterOffset.x = p_params.jitter.x;
- dispatch_desc.jitterOffset.y = p_params.jitter.y;
- dispatch_desc.motionVectorScale.x = float(p_params.internal_size.width);
- dispatch_desc.motionVectorScale.y = float(p_params.internal_size.height);
- dispatch_desc.reset = p_params.reset_accumulation;
- dispatch_desc.renderSize.width = p_params.internal_size.width;
- dispatch_desc.renderSize.height = p_params.internal_size.height;
- dispatch_desc.enableSharpening = (p_params.sharpness > 1e-6f);
- dispatch_desc.sharpness = p_params.sharpness;
- dispatch_desc.frameTimeDelta = p_params.delta_time;
- dispatch_desc.preExposure = 1.0f;
- dispatch_desc.cameraNear = p_params.z_near;
- dispatch_desc.cameraFar = p_params.z_far;
- dispatch_desc.cameraFovAngleVertical = p_params.fovy;
- dispatch_desc.viewSpaceToMetersFactor = 1.0f;
- dispatch_desc.enableAutoReactive = false;
- dispatch_desc.autoTcThreshold = 1.0f;
- dispatch_desc.autoTcScale = 1.0f;
- dispatch_desc.autoReactiveScale = 1.0f;
- dispatch_desc.autoReactiveMax = 1.0f;
-
- RendererRD::MaterialStorage::store_camera(p_params.reprojection, dispatch_desc.reprojectionMatrix);
-
- FfxErrorCode result = ffxFsr2ContextDispatch(&p_params.context->fsr_context, &dispatch_desc);
- ERR_FAIL_COND(result != FFX_OK);
-}
diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
index 10f96eeb060f..4224f388d2bf 100644
--- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
+++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
@@ -84,7 +84,13 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_voxelgi()
void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_fsr2(RendererRD::FSR2Effect *p_effect) {
if (fsr2_context == nullptr) {
- fsr2_context = p_effect->create_context(render_buffers->get_internal_size(), render_buffers->get_target_size());
+ fsr2_context = p_effect->create_context(render_buffers->get_internal_size(), render_buffers->get_target_size(), render_buffers->get_fsr_auto_generate_reactive());
+ }
+}
+
+void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_fsr3_upscaler(RendererRD::FSR3UpscalerEffect *p_effect) {
+ if (fsr3_upscaler_context == nullptr) {
+ fsr3_upscaler_context = p_effect->create_context(render_buffers->get_internal_size(), render_buffers->get_target_size(), render_buffers->get_fsr_auto_generate_reactive());
}
}
@@ -127,6 +133,11 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::free_data() {
fsr2_context = nullptr;
}
+ if (fsr3_upscaler_context) {
+ memdelete(fsr3_upscaler_context);
+ fsr3_upscaler_context = nullptr;
+ }
+
#ifdef METAL_MFXTEMPORAL_ENABLED
if (mfx_temporal_context) {
memdelete(mfx_temporal_context);
@@ -1738,6 +1749,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
enum {
SCALE_NONE,
SCALE_FSR2,
+ SCALE_FSR3,
SCALE_MFX,
} scale_type = SCALE_NONE;
@@ -1745,6 +1757,9 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
case RS::VIEWPORT_SCALING_3D_MODE_FSR2:
scale_type = SCALE_FSR2;
break;
+ case RS::VIEWPORT_SCALING_3D_MODE_FSR3:
+ scale_type = SCALE_FSR3;
+ break;
case RS::VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL:
#ifdef METAL_MFXTEMPORAL_ENABLED
scale_type = SCALE_MFX;
@@ -2359,6 +2374,15 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
_process_compositor_effects(RS::COMPOSITOR_EFFECT_CALLBACK_TYPE_PRE_TRANSPARENT, p_render_data);
}
+ // Prepare opaque-only texture for reactive mask generation if needed.
+ if (rb->get_fsr_auto_generate_reactive()) {
+ rb->ensure_opaque_only_color_texture();
+ RD::get_singleton()->draw_command_begin_label("Copy Opaque-only Color for FSR");
+ Size2i copy_size = rb->get_internal_size();
+ copy_effects->copy_to_rect(rb->get_internal_texture(), rb->get_opaque_only_color_texture(), Rect2i(0, 0, copy_size.width, copy_size.height));
+ RD::get_singleton()->draw_command_end_label();
+ }
+
RENDER_TIMESTAMP("Render 3D Transparent Pass");
RD::get_singleton()->draw_command_begin_label("Render 3D Transparent Pass");
@@ -2434,6 +2458,11 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
params.depth = rb->get_depth_texture(v);
params.velocity = rb->get_velocity_buffer(false, v);
params.reactive = rb->get_internal_texture_reactive(v);
+ if (rb->get_fsr_auto_generate_reactive()) {
+ // Provide opaque only texture for reactive mask generation.
+ params.opaque_only = rb->get_opaque_only_color_texture(v);
+ }
+
params.exposure = exposure;
params.output = rb->get_upscaled_texture(v);
params.z_near = p_render_data->scene_data->z_near;
@@ -2455,6 +2484,57 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
fsr2_effect->upscale(params);
}
+ RD::get_singleton()->draw_command_end_label();
+ } else if (scale_type == SCALE_FSR3) {
+ rb_data->ensure_fsr3_upscaler(fsr3_upscaler_effect);
+
+ RID exposure;
+ if (RSG::camera_attributes->camera_attributes_uses_auto_exposure(p_render_data->camera_attributes)) {
+ exposure = luminance->get_current_luminance_buffer(rb);
+ }
+
+ RD::get_singleton()->draw_command_begin_label("FSR3 Upscaler");
+ RENDER_TIMESTAMP("FSR3 Upscaler");
+
+ for (uint32_t v = 0; v < rb->get_view_count(); v++) {
+ real_t fov = p_render_data->scene_data->cam_projection.get_fov();
+ real_t aspect = p_render_data->scene_data->cam_projection.get_aspect();
+ real_t fovy = p_render_data->scene_data->cam_projection.get_fovy(fov, 1.0 / aspect);
+ Vector2 jitter = p_render_data->scene_data->taa_jitter * Vector2(rb->get_internal_size()) * 0.5f;
+ RendererRD::FSR3UpscalerEffect::Parameters params;
+ params.context = rb_data->get_fsr3_upscaler_context();
+ params.internal_size = rb->get_internal_size();
+ params.sharpness = CLAMP(1.0f - (rb->get_fsr_sharpness() / 2.0f), 0.0f, 1.0f);
+ params.color = rb->get_internal_texture(v);
+ params.depth = rb->get_depth_texture(v);
+ params.velocity = rb->get_velocity_buffer(false, v);
+ params.reactive = rb->get_internal_texture_reactive(v);
+ if (rb->get_fsr_auto_generate_reactive()) {
+ // Provide opaque only texture for reactive mask generation.
+ params.opaque_only = rb->get_opaque_only_color_texture(v);
+ }
+
+ params.exposure = exposure;
+ params.output = rb->get_upscaled_texture(v);
+ params.z_near = p_render_data->scene_data->z_near;
+ params.z_far = p_render_data->scene_data->z_far;
+ params.fovy = fovy;
+ params.jitter = jitter;
+ params.delta_time = float(time_step);
+ params.reset_accumulation = false; // FIXME: The engine does not provide a way to reset the accumulation.
+
+ Projection correction;
+ correction.set_depth_correction(true, true, false);
+
+ const Projection &prev_proj = p_render_data->scene_data->prev_cam_projection;
+ const Projection &cur_proj = p_render_data->scene_data->cam_projection;
+ const Transform3D &prev_transform = p_render_data->scene_data->prev_cam_transform;
+ const Transform3D &cur_transform = p_render_data->scene_data->cam_transform;
+ params.reprojection = (correction * prev_proj) * prev_transform.affine_inverse() * cur_transform * (correction * cur_proj).inverse();
+
+ fsr3_upscaler_effect->upscale(params);
+ }
+
RD::get_singleton()->draw_command_end_label();
} else if (scale_type == SCALE_MFX) {
#ifdef METAL_MFXTEMPORAL_ENABLED
@@ -5094,6 +5174,7 @@ RenderForwardClustered::RenderForwardClustered() {
taa = memnew(RendererRD::TAA);
fsr2_effect = memnew(RendererRD::FSR2Effect);
+ fsr3_upscaler_effect = memnew(RendererRD::FSR3UpscalerEffect);
ss_effects = memnew(RendererRD::SSEffects);
#ifdef METAL_MFXTEMPORAL_ENABLED
motion_vectors_store = memnew(RendererRD::MotionVectorsStore);
@@ -5117,6 +5198,11 @@ RenderForwardClustered::~RenderForwardClustered() {
fsr2_effect = nullptr;
}
+ if (fsr3_upscaler_effect) {
+ memdelete(fsr3_upscaler_effect);
+ fsr3_upscaler_effect = nullptr;
+ }
+
#ifdef METAL_MFXTEMPORAL_ENABLED
if (mfx_temporal_effect) {
memdelete(mfx_temporal_effect);
diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h
index a5b3199f3123..1924a8119101 100644
--- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h
+++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h
@@ -33,7 +33,8 @@
#include "core/templates/paged_allocator.h"
#include "servers/rendering/multi_uma_buffer.h"
#include "servers/rendering/renderer_rd/cluster_builder_rd.h"
-#include "servers/rendering/renderer_rd/effects/fsr2.h"
+#include "servers/rendering/renderer_rd/effects/ffx/fsr2.h"
+#include "servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.h"
#ifdef METAL_ENABLED
#include "servers/rendering/renderer_rd/effects/metal_fx.h"
#endif
@@ -95,6 +96,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
private:
RenderSceneBuffersRD *render_buffers = nullptr;
RendererRD::FSR2Context *fsr2_context = nullptr;
+ RendererRD::FSR3UpscalerContext *fsr3_upscaler_context = nullptr;
#ifdef METAL_MFXTEMPORAL_ENABLED
RendererRD::MFXTemporalContext *mfx_temporal_context = nullptr;
#endif
@@ -142,7 +144,9 @@ class RenderForwardClustered : public RendererSceneRenderRD {
RID get_voxelgi_msaa(uint32_t p_layer) { return render_buffers->get_texture_slice(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_VOXEL_GI_MSAA, p_layer, 0); }
void ensure_fsr2(RendererRD::FSR2Effect *p_effect);
+ void ensure_fsr3_upscaler(RendererRD::FSR3UpscalerEffect *p_effect);
RendererRD::FSR2Context *get_fsr2_context() const { return fsr2_context; }
+ RendererRD::FSR3UpscalerContext *get_fsr3_upscaler_context() const { return fsr3_upscaler_context; }
#ifdef METAL_MFXTEMPORAL_ENABLED
bool ensure_mfx_temporal(RendererRD::MFXTemporalEffect *p_effect);
@@ -730,6 +734,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
RendererRD::TAA *taa = nullptr;
RendererRD::FSR2Effect *fsr2_effect = nullptr;
+ RendererRD::FSR3UpscalerEffect *fsr3_upscaler_effect = nullptr;
RendererRD::SSEffects *ss_effects = nullptr;
#ifdef METAL_MFXTEMPORAL_ENABLED
diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp
index 20443d66d292..7e8a86d5328f 100644
--- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp
+++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp
@@ -464,11 +464,11 @@ void RendererSceneRenderRD::_render_buffers_post_process_and_tonemap(const Rende
bool can_use_storage = _render_buffers_can_be_storage();
RS::ViewportScaling3DMode scale_mode = rb->get_scaling_3d_mode();
- bool use_upscaled_texture = rb->has_upscaled_texture() && (scale_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR2 || scale_mode == RS::VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL);
+ bool use_upscaled_texture = rb->has_upscaled_texture() && (scale_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR2 || scale_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR3 || scale_mode == RS::VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL);
SpatialUpscaler *spatial_upscaler = nullptr;
if (can_use_effects) {
- if (scale_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR) {
- spatial_upscaler = fsr;
+ if (scale_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR1) {
+ spatial_upscaler = fsr1;
} else if (scale_mode == RS::VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL) {
#if METAL_ENABLED
spatial_upscaler = mfx_spatial;
@@ -1696,7 +1696,7 @@ void RendererSceneRenderRD::init() {
vrs = memnew(RendererRD::VRS);
}
if (can_use_storage) {
- fsr = memnew(RendererRD::FSR);
+ fsr1 = memnew(RendererRD::FSR1Effect);
}
#ifdef METAL_ENABLED
mfx_spatial = memnew(RendererRD::MFXSpatialEffect);
@@ -1730,8 +1730,8 @@ RendererSceneRenderRD::~RendererSceneRenderRD() {
if (vrs) {
memdelete(vrs);
}
- if (fsr) {
- memdelete(fsr);
+ if (fsr1) {
+ memdelete(fsr1);
}
#ifdef METAL_ENABLED
if (mfx_spatial) {
diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h
index 831140da357d..40560ee8bcfb 100644
--- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h
+++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h
@@ -34,7 +34,7 @@
#include "servers/rendering/renderer_rd/effects/bokeh_dof.h"
#include "servers/rendering/renderer_rd/effects/copy_effects.h"
#include "servers/rendering/renderer_rd/effects/debug_effects.h"
-#include "servers/rendering/renderer_rd/effects/fsr.h"
+#include "servers/rendering/renderer_rd/effects/ffx/fsr1.h"
#include "servers/rendering/renderer_rd/effects/luminance.h"
#ifdef METAL_ENABLED
#include "servers/rendering/renderer_rd/effects/metal_fx.h"
@@ -65,9 +65,9 @@ class RendererSceneRenderRD : public RendererSceneRender, public RenderingShader
RendererRD::Luminance *luminance = nullptr;
RendererRD::SMAA *smaa = nullptr;
RendererRD::ToneMapper *tone_mapper = nullptr;
- RendererRD::FSR *fsr = nullptr;
RendererRD::VRS *vrs = nullptr;
RendererRD::Resolve *resolve_effects = nullptr;
+ RendererRD::FSR1Effect *fsr1 = nullptr;
#ifdef METAL_ENABLED
RendererRD::MFXSpatialEffect *mfx_spatial = nullptr;
#endif
diff --git a/servers/rendering/renderer_rd/shaders/effects/SCsub b/servers/rendering/renderer_rd/shaders/effects/SCsub
index e5517e52eba0..4dd8a4cdc206 100644
--- a/servers/rendering/renderer_rd/shaders/effects/SCsub
+++ b/servers/rendering/renderer_rd/shaders/effects/SCsub
@@ -17,4 +17,4 @@ if "RD_GLSL" in env["BUILDERS"]:
for glsl_file in glsl_files:
env.RD_GLSL(glsl_file)
-SConscript("fsr2/SCsub")
+SConscript("ffx/SCsub")
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/SCsub b/servers/rendering/renderer_rd/shaders/effects/ffx/SCsub
new file mode 100644
index 000000000000..4d6ae4ba7481
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/SCsub
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+from misc.utility.scons_hints import *
+
+Import("env")
+
+if "RD_GLSL" in env["BUILDERS"]:
+ # find all include files
+ gl_include_files = (
+ [str(f) for f in Glob("*_inc.glsl")]
+ + [str(f) for f in Glob("../*_inc.glsl")]
+ + [str(f) for f in Glob("*/*_inc.glsl")]
+ )
+
+ # Add all FSR2 shader and header files.
+ ffx_dir = "#thirdparty/amd-ffx"
+ gl_include_files += [str(f) for f in Glob(ffx_dir + "/shaders/*/*.glsl")]
+ gl_include_files += [str(f) for f in Glob(ffx_dir + "/gpu/*.h")]
+ gl_include_files += [str(f) for f in Glob(ffx_dir + "/gpu/*/*.h")]
+
+ # find all shader code(all glsl files excluding our include files)
+ glsl_files = [str(f) for f in Glob("*/*.glsl") if str(f) not in gl_include_files]
+
+ # make sure we recompile shaders if include files change
+ env.Depends([f + ".gen.h" for f in glsl_files], gl_include_files + ["#glsl_builders.py"])
+
+ # compile shaders
+ for glsl_file in glsl_files:
+ env.RD_GLSL(glsl_file)
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_easu_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_easu_pass.glsl
new file mode 100644
index 000000000000..5467a16c6ba7
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_easu_pass.glsl
@@ -0,0 +1,7 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "thirdparty/amd-ffx/shaders/fsr1/ffx_fsr1_easu_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_rcas_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_rcas_pass.glsl
new file mode 100644
index 000000000000..c3d0d33f983b
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_rcas_pass.glsl
@@ -0,0 +1,7 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "thirdparty/amd-ffx/shaders/fsr1/ffx_fsr1_rcas_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_accumulate_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_accumulate_pass.glsl
new file mode 100644
index 000000000000..8adc90f1ea73
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_accumulate_pass.glsl
@@ -0,0 +1,8 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "../../motion_vector_inc.glsl"
+#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_accumulate_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_autogen_reactive_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_autogen_reactive_pass.glsl
new file mode 100644
index 000000000000..bf6e1aedd251
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_autogen_reactive_pass.glsl
@@ -0,0 +1,8 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "../../motion_vector_inc.glsl"
+#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_autogen_reactive_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_compute_luminance_pyramid_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_compute_luminance_pyramid_pass.glsl
new file mode 100644
index 000000000000..2a21d360d422
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_compute_luminance_pyramid_pass.glsl
@@ -0,0 +1,7 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_depth_clip_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_depth_clip_pass.glsl
new file mode 100644
index 000000000000..511f1e26ff75
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_depth_clip_pass.glsl
@@ -0,0 +1,8 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "../../motion_vector_inc.glsl"
+#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_depth_clip_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_lock_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_lock_pass.glsl
new file mode 100644
index 000000000000..586362f6bedb
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_lock_pass.glsl
@@ -0,0 +1,7 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_lock_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_rcas_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_rcas_pass.glsl
new file mode 100644
index 000000000000..e0134b9ca71c
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_rcas_pass.glsl
@@ -0,0 +1,7 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_rcas_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_reconstruct_previous_depth_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_reconstruct_previous_depth_pass.glsl
new file mode 100644
index 000000000000..370e2ecc9359
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_reconstruct_previous_depth_pass.glsl
@@ -0,0 +1,8 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "../../motion_vector_inc.glsl"
+#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_reconstruct_previous_depth_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_tcr_autogen_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_tcr_autogen_pass.glsl
new file mode 100644
index 000000000000..09114ac46a84
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_tcr_autogen_pass.glsl
@@ -0,0 +1,8 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "../../motion_vector_inc.glsl"
+#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_tcr_autogen_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_accumulate_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_accumulate_pass.glsl
new file mode 100644
index 000000000000..4de99bf9c901
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_accumulate_pass.glsl
@@ -0,0 +1,8 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "../../motion_vector_inc.glsl"
+#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_autogen_reactive_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_autogen_reactive_pass.glsl
new file mode 100644
index 000000000000..5ef9e308ba57
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_autogen_reactive_pass.glsl
@@ -0,0 +1,8 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "../../motion_vector_inc.glsl"
+#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_autogen_reactive_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_debug_view_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_debug_view_pass.glsl
new file mode 100644
index 000000000000..acff62a7a969
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_debug_view_pass.glsl
@@ -0,0 +1,8 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "../../motion_vector_inc.glsl"
+#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_instability_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_instability_pass.glsl
new file mode 100644
index 000000000000..33451b183bfd
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_instability_pass.glsl
@@ -0,0 +1,7 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_pyramid_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_pyramid_pass.glsl
new file mode 100644
index 000000000000..44c5479209a8
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_pyramid_pass.glsl
@@ -0,0 +1,7 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_inputs_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_inputs_pass.glsl
new file mode 100644
index 000000000000..66260e393fc5
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_inputs_pass.glsl
@@ -0,0 +1,8 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "../../motion_vector_inc.glsl"
+#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_reactivity_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_reactivity_pass.glsl
new file mode 100644
index 000000000000..04279dfcb62e
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_reactivity_pass.glsl
@@ -0,0 +1,8 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "../../motion_vector_inc.glsl"
+#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_rcas_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_rcas_pass.glsl
new file mode 100644
index 000000000000..10778593af5d
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_rcas_pass.glsl
@@ -0,0 +1,7 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pass.glsl
new file mode 100644
index 000000000000..9321a763340a
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pass.glsl
@@ -0,0 +1,8 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "../../motion_vector_inc.glsl"
+#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pyramid_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pyramid_pass.glsl
new file mode 100644
index 000000000000..0b4b31aab2fe
--- /dev/null
+++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pyramid_pass.glsl
@@ -0,0 +1,8 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+#include "../../motion_vector_inc.glsl"
+#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub b/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub
deleted file mode 100644
index 53f3ee397752..000000000000
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env python
-from misc.utility.scons_hints import *
-
-Import("env")
-
-if "RD_GLSL" in env["BUILDERS"]:
- # find all include files
- gl_include_files = [str(f) for f in Glob("*_inc.glsl")] + [str(f) for f in Glob("../*_inc.glsl")]
-
- # Add all FSR2 shader and header files.
- fsr2_dir = "#thirdparty/amd-fsr2/shaders"
- gl_include_files += [str(f) for f in Glob(fsr2_dir + "/*.h")]
- gl_include_files += [str(f) for f in Glob(fsr2_dir + "/*.glsl")]
-
- # find all shader code(all glsl files excluding our include files)
- glsl_files = [str(f) for f in Glob("*.glsl") if str(f) not in gl_include_files]
-
- # make sure we recompile shaders if include files change
- env.Depends([f + ".gen.h" for f in glsl_files], gl_include_files + ["#glsl_builders.py"])
-
- # compile shaders
- for glsl_file in glsl_files:
- env.RD_GLSL(glsl_file)
diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_accumulate_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_accumulate_pass.glsl
deleted file mode 100644
index 67fce9a34294..000000000000
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_accumulate_pass.glsl
+++ /dev/null
@@ -1,8 +0,0 @@
-#[compute]
-
-#version 450
-
-#VERSION_DEFINES
-
-#include "../motion_vector_inc.glsl"
-#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_autogen_reactive_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_autogen_reactive_pass.glsl
deleted file mode 100644
index d362958aa693..000000000000
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_autogen_reactive_pass.glsl
+++ /dev/null
@@ -1,8 +0,0 @@
-#[compute]
-
-#version 450
-
-#VERSION_DEFINES
-
-#include "../motion_vector_inc.glsl"
-#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_compute_luminance_pyramid_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_compute_luminance_pyramid_pass.glsl
deleted file mode 100644
index 37504c2e530b..000000000000
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_compute_luminance_pyramid_pass.glsl
+++ /dev/null
@@ -1,7 +0,0 @@
-#[compute]
-
-#version 450
-
-#VERSION_DEFINES
-
-#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_depth_clip_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_depth_clip_pass.glsl
deleted file mode 100644
index 0ee08e4c76f1..000000000000
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_depth_clip_pass.glsl
+++ /dev/null
@@ -1,8 +0,0 @@
-#[compute]
-
-#version 450
-
-#VERSION_DEFINES
-
-#include "../motion_vector_inc.glsl"
-#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_lock_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_lock_pass.glsl
deleted file mode 100644
index 8c8430d4b1c3..000000000000
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_lock_pass.glsl
+++ /dev/null
@@ -1,7 +0,0 @@
-#[compute]
-
-#version 450
-
-#VERSION_DEFINES
-
-#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_rcas_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_rcas_pass.glsl
deleted file mode 100644
index 4120cfe64495..000000000000
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_rcas_pass.glsl
+++ /dev/null
@@ -1,7 +0,0 @@
-#[compute]
-
-#version 450
-
-#VERSION_DEFINES
-
-#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_reconstruct_previous_depth_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_reconstruct_previous_depth_pass.glsl
deleted file mode 100644
index f31abec215d5..000000000000
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_reconstruct_previous_depth_pass.glsl
+++ /dev/null
@@ -1,8 +0,0 @@
-#[compute]
-
-#version 450
-
-#VERSION_DEFINES
-
-#include "../motion_vector_inc.glsl"
-#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_tcr_autogen_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_tcr_autogen_pass.glsl
deleted file mode 100644
index 818374e43c9a..000000000000
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_tcr_autogen_pass.glsl
+++ /dev/null
@@ -1,8 +0,0 @@
-#[compute]
-
-#version 450
-
-#VERSION_DEFINES
-
-#include "../motion_vector_inc.glsl"
-#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl"
diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr_upscale.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr_upscale.glsl
deleted file mode 100644
index 221e97becea7..000000000000
--- a/servers/rendering/renderer_rd/shaders/effects/fsr_upscale.glsl
+++ /dev/null
@@ -1,173 +0,0 @@
-/**************************************************************************/
-/* fsr_upscale.glsl */
-/**************************************************************************/
-/* This file is part of: */
-/* GODOT ENGINE */
-/* https://godotengine.org */
-/**************************************************************************/
-/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
-/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
-/* */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the */
-/* "Software"), to deal in the Software without restriction, including */
-/* without limitation the rights to use, copy, modify, merge, publish, */
-/* distribute, sublicense, and/or sell copies of the Software, and to */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions: */
-/* */
-/* The above copyright notice and this permission notice shall be */
-/* included in all copies or substantial portions of the Software. */
-/* */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
-/**************************************************************************/
-
-#[compute]
-
-#version 450
-
-#VERSION_DEFINES
-
-#define A_GPU
-#define A_GLSL
-
-#ifdef MODE_FSR_UPSCALE_NORMAL
-
-#define A_HALF
-
-#endif
-
-#include "thirdparty/amd-fsr/ffx_a.h"
-
-layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
-
-layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2D fsr_image;
-layout(set = 0, binding = 0) uniform sampler2D source_image;
-
-#define FSR_UPSCALE_PASS_TYPE_EASU 0
-#define FSR_UPSCALE_PASS_TYPE_RCAS 1
-
-layout(push_constant, std430) uniform Params {
- float resolution_width;
- float resolution_height;
- float upscaled_width;
- float upscaled_height;
- float sharpness;
- int pass;
-}
-params;
-
-AU4 Const0, Const1, Const2, Const3;
-
-#ifdef MODE_FSR_UPSCALE_FALLBACK
-
-#define FSR_EASU_F
-AF4 FsrEasuRF(AF2 p) {
- AF4 res = textureGather(source_image, p, 0);
- return res;
-}
-AF4 FsrEasuGF(AF2 p) {
- AF4 res = textureGather(source_image, p, 1);
- return res;
-}
-AF4 FsrEasuBF(AF2 p) {
- AF4 res = textureGather(source_image, p, 2);
- return res;
-}
-
-#define FSR_RCAS_F
-AF4 FsrRcasLoadF(ASU2 p) {
- return AF4(texelFetch(source_image, ASU2(p), 0));
-}
-void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
-
-#else
-
-#define FSR_EASU_H
-AH4 FsrEasuRH(AF2 p) {
- AH4 res = AH4(textureGather(source_image, p, 0));
- return res;
-}
-AH4 FsrEasuGH(AF2 p) {
- AH4 res = AH4(textureGather(source_image, p, 1));
- return res;
-}
-AH4 FsrEasuBH(AF2 p) {
- AH4 res = AH4(textureGather(source_image, p, 2));
- return res;
-}
-
-#define FSR_RCAS_H
-AH4 FsrRcasLoadH(ASW2 p) {
- return AH4(texelFetch(source_image, ASU2(p), 0));
-}
-void FsrRcasInputH(inout AH1 r, inout AH1 g, inout AH1 b) {}
-
-#endif
-
-#include "thirdparty/amd-fsr/ffx_fsr1.h"
-
-void fsr_easu_pass(AU2 pos) {
-#ifdef MODE_FSR_UPSCALE_NORMAL
-
- AH3 Gamma2Color = AH3(0, 0, 0);
- FsrEasuH(Gamma2Color, pos, Const0, Const1, Const2, Const3);
- imageStore(fsr_image, ASU2(pos), AH4(Gamma2Color, 1));
-
-#else
-
- AF3 Gamma2Color = AF3(0, 0, 0);
- FsrEasuF(Gamma2Color, pos, Const0, Const1, Const2, Const3);
- imageStore(fsr_image, ASU2(pos), AF4(Gamma2Color, 1));
-
-#endif
-}
-
-void fsr_rcas_pass(AU2 pos) {
-#ifdef MODE_FSR_UPSCALE_NORMAL
-
- AH3 Gamma2Color = AH3(0, 0, 0);
- FsrRcasH(Gamma2Color.r, Gamma2Color.g, Gamma2Color.b, pos, Const0);
- imageStore(fsr_image, ASU2(pos), AH4(Gamma2Color, 1));
-
-#else
-
- AF3 Gamma2Color = AF3(0, 0, 0);
- FsrRcasF(Gamma2Color.r, Gamma2Color.g, Gamma2Color.b, pos, Const0);
- imageStore(fsr_image, ASU2(pos), AF4(Gamma2Color, 1));
-
-#endif
-}
-
-void fsr_pass(AU2 pos) {
- if (params.pass == FSR_UPSCALE_PASS_TYPE_EASU) {
- fsr_easu_pass(pos);
- } else if (params.pass == FSR_UPSCALE_PASS_TYPE_RCAS) {
- fsr_rcas_pass(pos);
- }
-}
-
-void main() {
- // Clang does not like unused functions. If ffx_a.h is included in the binary, clang will throw a fit and not compile so we must configure FSR in this shader
- if (params.pass == FSR_UPSCALE_PASS_TYPE_EASU) {
- FsrEasuCon(Const0, Const1, Const2, Const3, params.resolution_width, params.resolution_height, params.resolution_width, params.resolution_height, params.upscaled_width, params.upscaled_height);
- } else if (params.pass == FSR_UPSCALE_PASS_TYPE_RCAS) {
- FsrRcasCon(Const0, params.sharpness);
- }
-
- AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
-
- fsr_pass(gxy);
- gxy.x += 8u;
- fsr_pass(gxy);
- gxy.y += 8u;
- fsr_pass(gxy);
- gxy.x -= 8u;
- fsr_pass(gxy);
-}
diff --git a/servers/rendering/renderer_rd/shaders/effects/motion_vector_inc.glsl b/servers/rendering/renderer_rd/shaders/effects/motion_vector_inc.glsl
index cbf202653e78..c3174fa27f4a 100644
--- a/servers/rendering/renderer_rd/shaders/effects/motion_vector_inc.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/motion_vector_inc.glsl
@@ -3,4 +3,4 @@ vec2 derive_motion_vector(vec2 uv, float depth, mat4 reprojection_matrix) {
return 0.5f + (previous_pos_ndc.xy / previous_pos_ndc.w) * 0.5f - uv;
}
-#define FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(i, j, k) derive_motion_vector(i, j, k)
+#define FFX_FSR_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(i, j, k) derive_motion_vector(i, j, k)
diff --git a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp
index 9169111872c4..b6401769e25b 100644
--- a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp
+++ b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp
@@ -71,6 +71,7 @@ void RenderSceneBuffersRD::_bind_methods() {
ClassDB::bind_method(D_METHOD("get_target_size"), &RenderSceneBuffersRD::get_target_size);
ClassDB::bind_method(D_METHOD("get_scaling_3d_mode"), &RenderSceneBuffersRD::get_scaling_3d_mode);
ClassDB::bind_method(D_METHOD("get_fsr_sharpness"), &RenderSceneBuffersRD::get_fsr_sharpness);
+ ClassDB::bind_method(D_METHOD("get_fsr_auto_generate_reactive"), &RenderSceneBuffersRD::get_fsr_auto_generate_reactive);
ClassDB::bind_method(D_METHOD("get_msaa_3d"), &RenderSceneBuffersRD::get_msaa_3d);
ClassDB::bind_method(D_METHOD("get_texture_samples"), &RenderSceneBuffersRD::get_texture_samples);
ClassDB::bind_method(D_METHOD("get_screen_space_aa"), &RenderSceneBuffersRD::get_screen_space_aa);
@@ -140,6 +141,11 @@ void RenderSceneBuffersRD::cleanup() {
}
}
+ if (fsr1_context) {
+ memdelete(fsr1_context);
+ fsr1_context = nullptr;
+ }
+
#ifdef METAL_ENABLED
if (mfx_spatial_context) {
memdelete(mfx_spatial_context);
@@ -163,6 +169,7 @@ void RenderSceneBuffersRD::configure(const RenderSceneBuffersConfiguration *p_co
screen_space_aa = p_config->get_screen_space_aa();
fsr_sharpness = p_config->get_fsr_sharpness();
+ fsr_auto_generate_reactive = p_config->get_fsr_auto_generate_reactive();
texture_mipmap_bias = p_config->get_texture_mipmap_bias();
anisotropic_filtering_level = p_config->get_anisotropic_filtering_level();
use_taa = p_config->get_use_taa();
@@ -179,7 +186,12 @@ void RenderSceneBuffersRD::configure(const RenderSceneBuffersConfiguration *p_co
// Create our color buffer.
const bool resolve_target = msaa_3d != RS::VIEWPORT_MSAA_DISABLED;
- create_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR, get_base_data_format(), get_color_usage_bits(resolve_target, false, can_be_storage));
+ uint32_t color_texture_usage_bits = get_color_usage_bits(resolve_target, false, can_be_storage);
+ if (fsr_auto_generate_reactive) {
+ // We need to copy the color texture if we have to record opaque-only color
+ color_texture_usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT;
+ }
+ create_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR, get_base_data_format(), color_texture_usage_bits);
// TODO: Detect when it is safe to use RD::TEXTURE_USAGE_TRANSIENT_BIT for RB_TEX_DEPTH, RB_TEX_COLOR_MSAA and/or RB_TEX_DEPTH_MSAA.
// (it means we cannot sample from it, we cannot copy from/to it) to save VRAM (and maybe performance too).
@@ -217,6 +229,7 @@ void RenderSceneBuffersRD::configure_for_reflections(const Size2i p_reflection_s
render_target = RID();
scaling_3d_mode = RS::VIEWPORT_SCALING_3D_MODE_OFF;
fsr_sharpness = 0.0;
+ fsr_auto_generate_reactive = false;
msaa_3d = RS::VIEWPORT_MSAA_DISABLED;
screen_space_aa = RS::VIEWPORT_SCREEN_SPACE_AA_DISABLED;
use_taa = false;
@@ -236,6 +249,10 @@ void RenderSceneBuffersRD::set_fsr_sharpness(float p_fsr_sharpness) {
fsr_sharpness = p_fsr_sharpness;
}
+void RenderSceneBuffersRD::set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) {
+ fsr_auto_generate_reactive = p_fsr_auto_generate_reactive;
+}
+
void RenderSceneBuffersRD::set_texture_mipmap_bias(float p_texture_mipmap_bias) {
texture_mipmap_bias = p_texture_mipmap_bias;
@@ -252,6 +269,22 @@ void RenderSceneBuffersRD::set_use_debanding(bool p_use_debanding) {
use_debanding = p_use_debanding;
}
+void RenderSceneBuffersRD::ensure_fsr1(RendererRD::FSR1Effect *p_effect) {
+ if (fsr1_context) {
+ return;
+ }
+
+ RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton();
+ RenderingDevice *rd = RD::get_singleton();
+
+ // Determine the output format of the render target.
+ RID dest = texture_storage->render_target_get_rd_texture(render_target);
+ RD::TextureFormat tf = rd->texture_get_format(dest);
+ RD::DataFormat output_format = tf.format;
+
+ fsr1_context = p_effect->create_context(internal_size, target_size, output_format);
+}
+
#ifdef METAL_ENABLED
void RenderSceneBuffersRD::ensure_mfx(RendererRD::MFXSpatialEffect *p_effect) {
if (mfx_spatial_context) {
@@ -714,6 +747,13 @@ RID RenderSceneBuffersRD::get_velocity_depth_buffer() {
return velocity_depth;
}
+// Opaque-only color
+void RenderSceneBuffersRD::ensure_opaque_only_color_texture() {
+ if (!has_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_OPAQUE_ONLY)) {
+ create_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_OPAQUE_ONLY, get_base_data_format(), get_color_usage_bits(true, false, can_be_storage));
+ }
+}
+
uint32_t RenderSceneBuffersRD::get_color_usage_bits(bool p_resolve, bool p_msaa, bool p_storage) {
DEV_ASSERT((!p_resolve && !p_msaa) || (p_resolve != p_msaa));
diff --git a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h
index e2c2175e1cb5..7c5603215861 100644
--- a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h
+++ b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h
@@ -33,6 +33,7 @@
#ifdef METAL_ENABLED
#include "../effects/metal_fx.h"
#endif
+#include "../effects/ffx/fsr1.h"
#include "../effects/vrs.h"
#include "core/templates/hash_map.h"
#include "material_storage.h"
@@ -52,6 +53,7 @@
#define RB_TEX_DEPTH_MSAA SNAME("depth_msaa")
#define RB_TEX_VELOCITY SNAME("velocity")
#define RB_TEX_VELOCITY_MSAA SNAME("velocity_msaa")
+#define RB_TEX_COLOR_OPAQUE_ONLY SNAME("color_opaque_only")
#define RB_TEX_BLUR_0 SNAME("blur_0")
#define RB_TEX_BLUR_1 SNAME("blur_1")
@@ -80,9 +82,11 @@ class RenderSceneBuffersRD : public RenderSceneBuffers {
Size2i internal_size = Size2i(0, 0);
RS::ViewportScaling3DMode scaling_3d_mode = RS::VIEWPORT_SCALING_3D_MODE_OFF;
float fsr_sharpness = 0.2f;
+ bool fsr_auto_generate_reactive = false;
float texture_mipmap_bias = 0.0f;
RS::ViewportAnisotropicFiltering anisotropic_filtering_level = RS::VIEWPORT_ANISOTROPY_4X;
+ RendererRD::FSR1Context *fsr1_context = nullptr;
#ifdef METAL_ENABLED
RendererRD::MFXSpatialContext *mfx_spatial_context = nullptr;
#endif
@@ -197,10 +201,14 @@ class RenderSceneBuffersRD : public RenderSceneBuffers {
virtual void configure(const RenderSceneBuffersConfiguration *p_config) override;
void configure_for_reflections(const Size2i p_reflection_size);
virtual void set_fsr_sharpness(float p_fsr_sharpness) override;
+ virtual void set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) override;
virtual void set_texture_mipmap_bias(float p_texture_mipmap_bias) override;
virtual void set_anisotropic_filtering_level(RS::ViewportAnisotropicFiltering p_anisotropic_filtering_level) override;
virtual void set_use_debanding(bool p_use_debanding) override;
+ void ensure_fsr1(RendererRD::FSR1Effect *p_effect);
+ _FORCE_INLINE_ RendererRD::FSR1Context *get_fsr1_context() const { return fsr1_context; }
+
#ifdef METAL_ENABLED
void ensure_mfx(RendererRD::MFXSpatialEffect *p_effect);
_FORCE_INLINE_ RendererRD::MFXSpatialContext *get_mfx_spatial_context() const { return mfx_spatial_context; }
@@ -236,6 +244,7 @@ class RenderSceneBuffersRD : public RenderSceneBuffers {
_FORCE_INLINE_ Size2i get_target_size() const { return target_size; }
_FORCE_INLINE_ RS::ViewportScaling3DMode get_scaling_3d_mode() const { return scaling_3d_mode; }
_FORCE_INLINE_ float get_fsr_sharpness() const { return fsr_sharpness; }
+ _FORCE_INLINE_ bool get_fsr_auto_generate_reactive() const { return fsr_auto_generate_reactive; }
_FORCE_INLINE_ RS::ViewportMSAA get_msaa_3d() const { return msaa_3d; }
_FORCE_INLINE_ RD::TextureSamples get_texture_samples() const { return texture_samples; }
_FORCE_INLINE_ RS::ViewportScreenSpaceAA get_screen_space_aa() const { return screen_space_aa; }
@@ -307,7 +316,7 @@ class RenderSceneBuffersRD : public RenderSceneBuffers {
return get_texture_slice(RB_SCOPE_BUFFERS, RB_TEX_COLOR_UPSCALED, p_layer, 0);
}
- // Velocity, currently only used by TAA (Clustered) but we'll be using this in other places soon too.
+ // Velocity, used by TAA and FSR.
void ensure_velocity();
bool has_velocity_buffer(bool p_has_msaa);
@@ -316,6 +325,19 @@ class RenderSceneBuffersRD : public RenderSceneBuffers {
RID get_velocity_depth_buffer();
+ // Opaque-only color buffer, used for FSR2/3
+
+ void ensure_opaque_only_color_texture();
+ _FORCE_INLINE_ bool has_opaque_only_color_texture() const {
+ return has_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_OPAQUE_ONLY);
+ }
+ _FORCE_INLINE_ RID get_opaque_only_color_texture() const {
+ return get_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_OPAQUE_ONLY);
+ }
+ _FORCE_INLINE_ RID get_opaque_only_color_texture(const uint32_t p_layer) {
+ return get_texture_slice(RB_SCOPE_BUFFERS, RB_TEX_COLOR_OPAQUE_ONLY, p_layer, 0);
+ }
+
// Samplers adjusted with the mipmap bias that is best fit for the configuration of these render buffers.
_FORCE_INLINE_ RendererRD::MaterialStorage::Samplers get_samplers() const {
diff --git a/servers/rendering/renderer_viewport.cpp b/servers/rendering/renderer_viewport.cpp
index c622decaa657..65a4c89f4d2b 100644
--- a/servers/rendering/renderer_viewport.cpp
+++ b/servers/rendering/renderer_viewport.cpp
@@ -166,7 +166,7 @@ void RendererViewport::_configure_3d_render_buffers(Viewport *p_viewport) {
}
if (scaling_3d_mode == RS::VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL && !RD::get_singleton()->has_feature(RD::SUPPORTS_METALFX_SPATIAL)) {
- scaling_3d_mode = RS::VIEWPORT_SCALING_3D_MODE_FSR;
+ scaling_3d_mode = RS::VIEWPORT_SCALING_3D_MODE_FSR1;
WARN_PRINT_ONCE("MetalFX spatial upscaling is not supported by the current renderer or hardware. Falling back to FSR scaling.");
}
@@ -207,7 +207,7 @@ void RendererViewport::_configure_3d_render_buffers(Viewport *p_viewport) {
if (use_taa && (scaling_type == RS::VIEWPORT_SCALING_3D_TYPE_TEMPORAL)) {
// Temporal upscalers can't be used with TAA.
// Turn it off and prefer using the temporal upscaler.
- WARN_PRINT_ONCE("FSR 2 or MetalFX Temporal is not compatible with TAA. Disabling TAA internally.");
+ WARN_PRINT_ONCE("FSR 2/3 or MetalFX Temporal is not compatible with TAA. Disabling TAA internally.");
use_taa = false;
}
@@ -227,8 +227,9 @@ void RendererViewport::_configure_3d_render_buffers(Viewport *p_viewport) {
break;
case RS::VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL:
case RS::VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL:
- case RS::VIEWPORT_SCALING_3D_MODE_FSR:
+ case RS::VIEWPORT_SCALING_3D_MODE_FSR1:
case RS::VIEWPORT_SCALING_3D_MODE_FSR2:
+ case RS::VIEWPORT_SCALING_3D_MODE_FSR3:
target_width = p_viewport->size.width;
target_height = p_viewport->size.height;
render_width = MAX(target_width * scaling_3d_scale, 1.0); // target_width / (target_width * scaling)
@@ -278,6 +279,7 @@ void RendererViewport::_configure_3d_render_buffers(Viewport *p_viewport) {
rb_config.set_msaa_3d(msaa_3d);
rb_config.set_screen_space_aa(p_viewport->screen_space_aa);
rb_config.set_fsr_sharpness(p_viewport->fsr_sharpness);
+ rb_config.set_fsr_auto_generate_reactive(p_viewport->fsr_auto_generate_reactive);
rb_config.set_texture_mipmap_bias(texture_mipmap_bias);
rb_config.set_anisotropic_filtering_level(p_viewport->anisotropic_filtering_level);
rb_config.set_use_taa(use_taa);
@@ -989,8 +991,9 @@ void RendererViewport::viewport_set_scaling_3d_mode(RID p_viewport, RS::Viewport
ERR_FAIL_NULL(viewport);
const String rendering_method = OS::get_singleton()->get_current_rendering_method();
if (rendering_method != "forward_plus") {
- ERR_FAIL_COND_EDMSG(p_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR, "FSR1 is only available when using the Forward+ renderer.");
+ ERR_FAIL_COND_EDMSG(p_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR1, "FSR1 is only available when using the Forward+ renderer.");
ERR_FAIL_COND_EDMSG(p_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR2, "FSR2 is only available when using the Forward+ renderer.");
+ ERR_FAIL_COND_EDMSG(p_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR3, "FSR3 is only available when using the Forward+ renderer.");
ERR_FAIL_COND_EDMSG(p_mode == RS::VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL, "MetalFX Temporal is only available when using the Forward+ renderer.");
}
if (rendering_method == "gl_compatibility") {
@@ -1020,6 +1023,14 @@ void RendererViewport::viewport_set_fsr_sharpness(RID p_viewport, float p_sharpn
_configure_3d_render_buffers(viewport);
}
+void RendererViewport::viewport_set_fsr_auto_generate_reactive(RID p_viewport, bool p_fsr_auto_generate_reactive) {
+ Viewport *viewport = viewport_owner.get_or_null(p_viewport);
+ ERR_FAIL_NULL(viewport);
+
+ viewport->fsr_auto_generate_reactive = p_fsr_auto_generate_reactive;
+ _configure_3d_render_buffers(viewport);
+}
+
void RendererViewport::viewport_set_texture_mipmap_bias(RID p_viewport, float p_mipmap_bias) {
Viewport *viewport = viewport_owner.get_or_null(p_viewport);
ERR_FAIL_NULL(viewport);
diff --git a/servers/rendering/renderer_viewport.h b/servers/rendering/renderer_viewport.h
index fa506be98cd8..087691844f3b 100644
--- a/servers/rendering/renderer_viewport.h
+++ b/servers/rendering/renderer_viewport.h
@@ -57,6 +57,7 @@ class RendererViewport {
RS::ViewportScaling3DMode scaling_3d_mode = RenderingServer::VIEWPORT_SCALING_3D_MODE_BILINEAR;
float scaling_3d_scale = 1.0;
float fsr_sharpness = 0.2f;
+ bool fsr_auto_generate_reactive = false;
float texture_mipmap_bias = 0.0f;
RS::ViewportAnisotropicFiltering anisotropic_filtering_level = RenderingServer::VIEWPORT_ANISOTROPY_4X;
bool fsr_enabled = false;
@@ -231,6 +232,7 @@ class RendererViewport {
void viewport_set_scaling_3d_mode(RID p_viewport, RS::ViewportScaling3DMode p_mode);
void viewport_set_scaling_3d_scale(RID p_viewport, float p_scaling_3d_scale);
void viewport_set_fsr_sharpness(RID p_viewport, float p_sharpness);
+ void viewport_set_fsr_auto_generate_reactive(RID p_viewport, bool p_auto_generate_reactive);
void viewport_set_texture_mipmap_bias(RID p_viewport, float p_mipmap_bias);
void viewport_set_anisotropic_filtering_level(RID p_viewport, RS::ViewportAnisotropicFiltering p_anisotropic_filtering_level);
diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp
index 0ed6c0ff6280..7f55ceeb5146 100644
--- a/servers/rendering/rendering_device.cpp
+++ b/servers/rendering/rendering_device.cpp
@@ -40,6 +40,7 @@
#include "core/profiling/profiling.h"
#include "core/templates/fixed_vector.h"
#include "modules/modules_enabled.gen.h"
+#include "renderer_rd/effects/ffx/ffx_common.h"
#include "servers/rendering/rendering_shader_container.h"
#ifdef MODULE_GLSLANG_ENABLED
diff --git a/servers/rendering/rendering_server.cpp b/servers/rendering/rendering_server.cpp
index 62f13bfc975c..aa49b19de8e3 100644
--- a/servers/rendering/rendering_server.cpp
+++ b/servers/rendering/rendering_server.cpp
@@ -2894,6 +2894,7 @@ void RenderingServer::_bind_methods() {
ClassDB::bind_method(D_METHOD("viewport_set_scaling_3d_mode", "viewport", "scaling_3d_mode"), &RenderingServer::viewport_set_scaling_3d_mode);
ClassDB::bind_method(D_METHOD("viewport_set_scaling_3d_scale", "viewport", "scale"), &RenderingServer::viewport_set_scaling_3d_scale);
ClassDB::bind_method(D_METHOD("viewport_set_fsr_sharpness", "viewport", "sharpness"), &RenderingServer::viewport_set_fsr_sharpness);
+ ClassDB::bind_method(D_METHOD("viewport_set_fsr_auto_generate_reactive", "viewport", "fsr_auto_generate_reactive"), &RenderingServer::viewport_set_fsr_auto_generate_reactive);
ClassDB::bind_method(D_METHOD("viewport_set_texture_mipmap_bias", "viewport", "mipmap_bias"), &RenderingServer::viewport_set_texture_mipmap_bias);
ClassDB::bind_method(D_METHOD("viewport_set_anisotropic_filtering_level", "viewport", "anisotropic_filtering_level"), &RenderingServer::viewport_set_anisotropic_filtering_level);
ClassDB::bind_method(D_METHOD("viewport_set_update_mode", "viewport", "update_mode"), &RenderingServer::viewport_set_update_mode);
@@ -2947,8 +2948,9 @@ void RenderingServer::_bind_methods() {
ClassDB::bind_method(D_METHOD("viewport_set_vrs_texture", "viewport", "texture"), &RenderingServer::viewport_set_vrs_texture);
BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_BILINEAR);
- BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_FSR);
+ BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_FSR1);
BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_FSR2);
+ BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_FSR3);
BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL);
BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL);
BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_MAX);
@@ -3784,7 +3786,7 @@ void RenderingServer::init() {
String mode_hints;
String mode_hints_metal;
{
- Vector mode_hints_arr = { "Bilinear (Fastest)", "FSR 1.0 (Fast)", "FSR 2.2 (Slow)" };
+ Vector mode_hints_arr = { "Bilinear (Fastest)", "FSR 1.2 (Fast)", "FSR 2.3 (Slow)", "FSR 3.1 (Slow)" };
mode_hints = String(",").join(mode_hints_arr);
mode_hints_arr.push_back("MetalFX (Spatial)");
@@ -3798,6 +3800,7 @@ void RenderingServer::init() {
}
GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/scaling_3d/scale", PROPERTY_HINT_RANGE, "0.25,2.0,0.01"), 1.0);
GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/scaling_3d/fsr_sharpness", PROPERTY_HINT_RANGE, "0,2,0.1"), 0.2f);
+ GLOBAL_DEF(PropertyInfo(Variant::BOOL, "rendering/scaling_3d/fsr_auto_generate_reactive"), false);
GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/textures/default_filters/texture_mipmap_bias", PROPERTY_HINT_RANGE, "-2,2,0.001"), 0.0f);
GLOBAL_DEF(PropertyInfo(Variant::INT, "rendering/textures/decals/filter", PROPERTY_HINT_ENUM, "Nearest (Fast),Linear (Fast),Nearest Mipmap (Fast),Linear Mipmap (Fast),Nearest Mipmap Anisotropic (Average),Linear Mipmap Anisotropic (Average)"), DECAL_FILTER_LINEAR_MIPMAPS);
diff --git a/servers/rendering/rendering_server.h b/servers/rendering/rendering_server.h
index 23f4b9d0cef6..4949478954a2 100644
--- a/servers/rendering/rendering_server.h
+++ b/servers/rendering/rendering_server.h
@@ -948,8 +948,9 @@ class RenderingServer : public Object {
enum ViewportScaling3DMode {
VIEWPORT_SCALING_3D_MODE_BILINEAR,
- VIEWPORT_SCALING_3D_MODE_FSR,
+ VIEWPORT_SCALING_3D_MODE_FSR1,
VIEWPORT_SCALING_3D_MODE_FSR2,
+ VIEWPORT_SCALING_3D_MODE_FSR3,
VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL,
VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL,
VIEWPORT_SCALING_3D_MODE_MAX,
@@ -973,9 +974,9 @@ class RenderingServer : public Object {
};
_ALWAYS_INLINE_ static ViewportScaling3DType scaling_3d_mode_type(ViewportScaling3DMode p_mode) {
- if (p_mode == VIEWPORT_SCALING_3D_MODE_BILINEAR || p_mode == VIEWPORT_SCALING_3D_MODE_FSR || p_mode == VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL) {
+ if (p_mode == VIEWPORT_SCALING_3D_MODE_BILINEAR || p_mode == VIEWPORT_SCALING_3D_MODE_FSR1 || p_mode == VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL) {
return VIEWPORT_SCALING_3D_TYPE_SPATIAL;
- } else if (p_mode == VIEWPORT_SCALING_3D_MODE_FSR2 || p_mode == VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL) {
+ } else if (p_mode == VIEWPORT_SCALING_3D_MODE_FSR2 || p_mode == VIEWPORT_SCALING_3D_MODE_FSR3 || p_mode == VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL) {
return VIEWPORT_SCALING_3D_TYPE_TEMPORAL;
}
return VIEWPORT_SCALING_3D_TYPE_NONE;
@@ -996,6 +997,7 @@ class RenderingServer : public Object {
virtual void viewport_set_scaling_3d_mode(RID p_viewport, ViewportScaling3DMode p_scaling_3d_mode) = 0;
virtual void viewport_set_scaling_3d_scale(RID p_viewport, float p_scaling_3d_scale) = 0;
virtual void viewport_set_fsr_sharpness(RID p_viewport, float p_fsr_sharpness) = 0;
+ virtual void viewport_set_fsr_auto_generate_reactive(RID p_viewport, bool p_fsr_auto_generate_reactive) = 0;
virtual void viewport_set_texture_mipmap_bias(RID p_viewport, float p_texture_mipmap_bias) = 0;
virtual void viewport_set_anisotropic_filtering_level(RID p_viewport, ViewportAnisotropicFiltering p_anisotropic_filtering_level) = 0;
diff --git a/servers/rendering/rendering_server_default.h b/servers/rendering/rendering_server_default.h
index 34faef662c41..9c6d562b5486 100644
--- a/servers/rendering/rendering_server_default.h
+++ b/servers/rendering/rendering_server_default.h
@@ -711,6 +711,7 @@ class RenderingServerDefault : public RenderingServer {
FUNC2(viewport_set_scaling_3d_mode, RID, ViewportScaling3DMode)
FUNC2(viewport_set_scaling_3d_scale, RID, float)
FUNC2(viewport_set_fsr_sharpness, RID, float)
+ FUNC2(viewport_set_fsr_auto_generate_reactive, RID, bool)
FUNC2(viewport_set_texture_mipmap_bias, RID, float)
FUNC2(viewport_set_anisotropic_filtering_level, RID, ViewportAnisotropicFiltering)
diff --git a/servers/rendering/storage/render_scene_buffers.cpp b/servers/rendering/storage/render_scene_buffers.cpp
index 67135a4c4b5f..ccbf665bae14 100644
--- a/servers/rendering/storage/render_scene_buffers.cpp
+++ b/servers/rendering/storage/render_scene_buffers.cpp
@@ -49,7 +49,7 @@ void RenderSceneBuffersConfiguration::_bind_methods() {
ClassDB::bind_method(D_METHOD("get_scaling_3d_mode"), &RenderSceneBuffersConfiguration::get_scaling_3d_mode);
ClassDB::bind_method(D_METHOD("set_scaling_3d_mode", "scaling_3d_mode"), &RenderSceneBuffersConfiguration::set_scaling_3d_mode);
- ADD_PROPERTY(PropertyInfo(Variant::INT, "scaling_3d_mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.0 (Fast),FSR 2.2 (Slow),MetalFX (Spatial),MetalFX (Temporal)"), "set_scaling_3d_mode", "get_scaling_3d_mode"); // TODO VIEWPORT_SCALING_3D_MODE_OFF is possible here too, but we can't specify an enum string for it.
+ ADD_PROPERTY(PropertyInfo(Variant::INT, "scaling_3d_mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.2 (Fast),FSR 2.3 (Slow),FSR 3.1 (Slow),MetalFX (Spatial),MetalFX (Temporal)"), "set_scaling_3d_mode", "get_scaling_3d_mode"); // TODO VIEWPORT_SCALING_3D_MODE_OFF is possible here too, but we can't specify an enum string for it.
ClassDB::bind_method(D_METHOD("get_msaa_3d"), &RenderSceneBuffersConfiguration::get_msaa_3d);
ClassDB::bind_method(D_METHOD("set_msaa_3d", "msaa_3d"), &RenderSceneBuffersConfiguration::set_msaa_3d);
@@ -63,6 +63,10 @@ void RenderSceneBuffersConfiguration::_bind_methods() {
ClassDB::bind_method(D_METHOD("set_fsr_sharpness", "fsr_sharpness"), &RenderSceneBuffersConfiguration::set_fsr_sharpness);
ADD_PROPERTY(PropertyInfo(Variant::BOOL, "fsr_sharpness"), "set_fsr_sharpness", "get_fsr_sharpness");
+ ClassDB::bind_method(D_METHOD("get_fsr_auto_generate_reactive"), &RenderSceneBuffersConfiguration::get_fsr_auto_generate_reactive);
+ ClassDB::bind_method(D_METHOD("set_fsr_auto_generate_reactive", "fsr_auto_generate_reactive"), &RenderSceneBuffersConfiguration::set_fsr_auto_generate_reactive);
+ ADD_PROPERTY(PropertyInfo(Variant::BOOL, "fsr_auto_generate_reactive"), "set_fsr_auto_generate_reactive", "get_fsr_auto_generate_reactive");
+
ClassDB::bind_method(D_METHOD("get_texture_mipmap_bias"), &RenderSceneBuffersConfiguration::get_texture_mipmap_bias);
ClassDB::bind_method(D_METHOD("set_texture_mipmap_bias", "texture_mipmap_bias"), &RenderSceneBuffersConfiguration::set_texture_mipmap_bias);
ADD_PROPERTY(PropertyInfo(Variant::BOOL, "texture_mipmap_bias"), "set_texture_mipmap_bias", "get_texture_mipmap_bias");
@@ -79,6 +83,7 @@ void RenderSceneBuffers::_bind_methods() {
void RenderSceneBuffersExtension::_bind_methods() {
GDVIRTUAL_BIND(_configure, "config");
GDVIRTUAL_BIND(_set_fsr_sharpness, "fsr_sharpness");
+ GDVIRTUAL_BIND(_set_fsr_auto_generate_reactive, "fsr_auto_generate_reactive");
GDVIRTUAL_BIND(_set_texture_mipmap_bias, "texture_mipmap_bias");
GDVIRTUAL_BIND(_set_anisotropic_filtering_level, "anisotropic_filtering_level");
GDVIRTUAL_BIND(_set_use_debanding, "use_debanding");
@@ -92,6 +97,10 @@ void RenderSceneBuffersExtension::set_fsr_sharpness(float p_fsr_sharpness) {
GDVIRTUAL_CALL(_set_fsr_sharpness, p_fsr_sharpness);
}
+void RenderSceneBuffersExtension::set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) {
+ GDVIRTUAL_CALL(_set_fsr_auto_generate_reactive, p_fsr_auto_generate_reactive);
+}
+
void RenderSceneBuffersExtension::set_texture_mipmap_bias(float p_texture_mipmap_bias) {
GDVIRTUAL_CALL(_set_texture_mipmap_bias, p_texture_mipmap_bias);
}
diff --git a/servers/rendering/storage/render_scene_buffers.h b/servers/rendering/storage/render_scene_buffers.h
index fead81c6256f..fcc8b37690ba 100644
--- a/servers/rendering/storage/render_scene_buffers.h
+++ b/servers/rendering/storage/render_scene_buffers.h
@@ -49,6 +49,7 @@ class RenderSceneBuffersConfiguration : public RefCounted {
RS::ViewportAnisotropicFiltering anisotropic_filtering_level = RS::VIEWPORT_ANISOTROPY_4X;
float fsr_sharpness = 0.0;
+ bool fsr_auto_generate_reactive = false;
float texture_mipmap_bias = 0.0;
bool use_taa = false;
bool use_debanding = false;
@@ -81,6 +82,9 @@ class RenderSceneBuffersConfiguration : public RefCounted {
float get_fsr_sharpness() const { return fsr_sharpness; }
void set_fsr_sharpness(float p_fsr_sharpness) { fsr_sharpness = p_fsr_sharpness; }
+ bool get_fsr_auto_generate_reactive() const { return fsr_auto_generate_reactive; }
+ void set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) { fsr_auto_generate_reactive = p_fsr_auto_generate_reactive; }
+
float get_texture_mipmap_bias() const { return texture_mipmap_bias; }
void set_texture_mipmap_bias(float p_texture_mipmap_bias) { texture_mipmap_bias = p_texture_mipmap_bias; }
@@ -111,6 +115,7 @@ class RenderSceneBuffers : public RefCounted {
// for those settings that are unlikely to require buffers to be recreated, we'll add setters
virtual void set_fsr_sharpness(float p_fsr_sharpness) = 0;
+ virtual void set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) = 0;
virtual void set_texture_mipmap_bias(float p_texture_mipmap_bias) = 0;
virtual void set_anisotropic_filtering_level(RS::ViewportAnisotropicFiltering p_anisotropic_filtering_level) = 0;
virtual void set_use_debanding(bool p_use_debanding) = 0;
@@ -124,6 +129,7 @@ class RenderSceneBuffersExtension : public RenderSceneBuffers {
GDVIRTUAL1(_configure, const RenderSceneBuffersConfiguration *)
GDVIRTUAL1(_set_fsr_sharpness, float)
+ GDVIRTUAL1(_set_fsr_auto_generate_reactive, bool)
GDVIRTUAL1(_set_texture_mipmap_bias, float)
GDVIRTUAL1(_set_anisotropic_filtering_level, int)
GDVIRTUAL1(_set_use_debanding, bool)
@@ -134,6 +140,7 @@ class RenderSceneBuffersExtension : public RenderSceneBuffers {
virtual void configure(const RenderSceneBuffersConfiguration *p_config) override;
virtual void set_fsr_sharpness(float p_fsr_sharpness) override;
+ virtual void set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) override;
virtual void set_texture_mipmap_bias(float p_texture_mipmap_bias) override;
virtual void set_anisotropic_filtering_level(RS::ViewportAnisotropicFiltering p_anisotropic_filtering_level) override;
virtual void set_use_debanding(bool p_use_debanding) override;
diff --git a/thirdparty/README.md b/thirdparty/README.md
index dc7524be8d37..b1af40953328 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -17,34 +17,20 @@ Files extracted from upstream source:
- `LICENSE-MIT`
-## amd-fsr
+## amd-ffx
-- Upstream: https://github.com/GPUOpen-Effects/FidelityFX-FSR
-- Version: 1.0.2 (a21ffb8f6c13233ba336352bdff293894c706575, 2021)
+- Upstream: https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK
+- Version: 1.1.4 (c6efa6bf7f2027b3ec94f28578bb5965eabb9e55, 2025)
- License: MIT
Files extracted from upstream source:
-- `ffx_a.h` and `ffx_fsr1.h` from `ffx-fsr`
-- `license.txt`
-
-
-## amd-fsr2
-
-- Upstream: https://github.com/GPUOpen-Effects/FidelityFX-FSR2
-- Version: 2.2.1 (1680d1edd5c034f88ebbbb793d8b88f8842cf804, 2023)
-- License: MIT
-
-Files extracted from upstream source:
-
-- `ffx_*.cpp` and `ffx_*.h` from `src/ffx-fsr2-api`
-- `shaders` folder from `src/ffx-fsr2-api` with `ffx_*.hlsl` files excluded
-- `LICENSE.txt`
-
-Patches:
-
-- `0001-build-fixes.patch` (GH-81197)
-- `0002-godot-fsr2-options.patch` (GH-81197)
+- `ffx_*.h` from `sdk/include/FidelityFX/host`
+- `ffx_message.cpp`, `ffx_assert.cpp`, `ffx_object_management.cpp` and `ffx_object_management.h` from `sdk/src/shared`
+- `ffx_*.cpp` and `ffx_*_private.h` from `sdk/src/components/*`
+- `gpu` folder from `sdk/include/FidelityFX/gpu`
+- `shaders` folder from `sdk/src/backends/vk/shaders`
+- `license.md`
## angle
diff --git a/thirdparty/amd-fsr2/ffx_assert.cpp b/thirdparty/amd-ffx/ffx_assert.cpp
similarity index 83%
rename from thirdparty/amd-fsr2/ffx_assert.cpp
rename to thirdparty/amd-ffx/ffx_assert.cpp
index 8a70ad501a12..4ba09b26f343 100644
--- a/thirdparty/amd-fsr2/ffx_assert.cpp
+++ b/thirdparty/amd-ffx/ffx_assert.cpp
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -49,8 +50,8 @@ bool ffxAssertReport(const char* file, int32_t line, const char* condition, cons
#ifdef _WIN32
// form the final assertion string and output to the TTY.
- const size_t bufferSize = static_cast(snprintf(nullptr, 0, "%s(%d): ASSERTION FAILED. %s\n", file, line, message ? message : condition)) + 1;
- char* tempBuf = static_cast(malloc(bufferSize));
+ const size_t bufferSize = snprintf(NULL, 0, "%s(%d): ASSERTION FAILED. %s\n", file, line, message ? message : condition) + 1;
+ char* tempBuf = (char*)malloc(bufferSize);
if (!tempBuf) {
return true;
diff --git a/thirdparty/amd-fsr2/ffx_assert.h b/thirdparty/amd-ffx/ffx_assert.h
similarity index 90%
rename from thirdparty/amd-fsr2/ffx_assert.h
rename to thirdparty/amd-ffx/ffx_assert.h
index ae32d2a73345..b0df5af2813d 100644
--- a/thirdparty/amd-fsr2/ffx_assert.h
+++ b/thirdparty/amd-ffx/ffx_assert.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -28,6 +29,11 @@
extern "C" {
#endif // #ifdef __cplusplus
+/// @defgroup Asserts Asserts
+/// Asserts used by FidelityFX SDK functions
+///
+/// @ingroup ffxHost
+
#ifdef _DEBUG
#ifdef _WIN32
@@ -37,6 +43,8 @@ extern "C" {
}
#else
/// Macro to force the debugger to break at this point in the code.
+///
+/// @ingroup Asserts
#define FFX_DEBUG_BREAK __debugbreak();
#endif
#else
@@ -58,6 +66,7 @@ extern "C" {
///
/// @param [in] message The message generated by the assert.
///
+/// @ingroup Asserts
typedef void (*FfxAssertCallback)(const char* message);
/// Function to report an assert.
@@ -70,16 +79,20 @@ typedef void (*FfxAssertCallback)(const char* message);
/// @returns
/// Always returns true.
///
+/// @ingroup Asserts
FFX_API bool ffxAssertReport(const char* file, int32_t line, const char* condition, const char* msg);
/// Provides the ability to set a callback for assert messages.
///
/// @param [in] callback The callback function that will receive assert messages.
///
+/// @ingroup Asserts
FFX_API void ffxAssertSetPrintingCallback(FfxAssertCallback callback);
#ifdef _DEBUG
/// Standard assert macro.
+///
+/// @ingroup Asserts
#define FFX_ASSERT(condition) \
do \
{ \
@@ -88,6 +101,8 @@ FFX_API void ffxAssertSetPrintingCallback(FfxAssertCallback callback);
} while (0)
/// Assert macro with message.
+///
+/// @ingroup Asserts
#define FFX_ASSERT_MESSAGE(condition, msg) \
do \
{ \
@@ -96,6 +111,8 @@ FFX_API void ffxAssertSetPrintingCallback(FfxAssertCallback callback);
} while (0)
/// Assert macro that always fails.
+///
+/// @ingroup Asserts
#define FFX_ASSERT_FAIL(message) \
do \
{ \
@@ -125,6 +142,8 @@ FFX_API void ffxAssertSetPrintingCallback(FfxAssertCallback callback);
#endif // #if _DEBUG
/// Simple static assert.
+///
+/// @ingroup Asserts
#define FFX_STATIC_ASSERT(condition) static_assert(condition, #condition)
#ifdef __cplusplus
diff --git a/thirdparty/amd-ffx/ffx_error.h b/thirdparty/amd-ffx/ffx_error.h
new file mode 100644
index 000000000000..40599719b7a0
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_error.h
@@ -0,0 +1,79 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+#include "ffx_types.h"
+
+/// @defgroup Errors Error Codes
+/// Error codes returned from FidelityFX SDK functions
+///
+/// @ingroup ffxHost
+
+/// Typedef for error codes returned from functions in the FidelityFX SDK.
+///
+/// @ingroup Errors
+typedef int32_t FfxErrorCode;
+
+/// Error codes and their meaning
+///
+/// @ingroup Errors
+typedef enum FfxErrorCodes {
+
+ FFX_OK = 0, ///< The operation completed successfully.
+ FFX_ERROR_INVALID_POINTER = 0x80000000, ///< The operation failed due to an invalid pointer.
+ FFX_ERROR_INVALID_ALIGNMENT = 0x80000001, ///< The operation failed due to an invalid alignment.
+ FFX_ERROR_INVALID_SIZE = 0x80000002, ///< The operation failed due to an invalid size.
+ FFX_EOF = 0x80000003, ///< The end of the file was encountered.
+ FFX_ERROR_INVALID_PATH = 0x80000004, ///< The operation failed because the specified path was invalid.
+ FFX_ERROR_EOF = 0x80000005, ///< The operation failed because end of file was reached.
+ FFX_ERROR_MALFORMED_DATA = 0x80000006, ///< The operation failed because of some malformed data.
+ FFX_ERROR_OUT_OF_MEMORY = 0x80000007, ///< The operation failed because it ran out memory.
+ FFX_ERROR_INCOMPLETE_INTERFACE = 0x80000008, ///< The operation failed because the interface was not fully configured.
+ FFX_ERROR_INVALID_ENUM = 0x80000009, ///< The operation failed because of an invalid enumeration value.
+ FFX_ERROR_INVALID_ARGUMENT = 0x8000000a, ///< The operation failed because an argument was invalid.
+ FFX_ERROR_OUT_OF_RANGE = 0x8000000b, ///< The operation failed because a value was out of range.
+ FFX_ERROR_NULL_DEVICE = 0x8000000c, ///< The operation failed because a device was null.
+ FFX_ERROR_BACKEND_API_ERROR = 0x8000000d, ///< The operation failed because the backend API returned an error code.
+ FFX_ERROR_INSUFFICIENT_MEMORY = 0x8000000e, ///< The operation failed because there was not enough memory.
+ FFX_ERROR_INVALID_VERSION = 0x8000000f, ///< The operation failed because the wrong backend was linked.
+ FFX_ERROR_ACCESS_DENIED = 0x80000010, ///< The operation failed because access to the resource was denied.
+
+}FfxErrorCodes;
+
+/// Helper macro to return error code y from a function when a specific condition, x, is not met.
+///
+/// @ingroup Errors
+#define FFX_RETURN_ON_ERROR(x, y) \
+ if (!(x)) \
+ { \
+ return (y); \
+ }
+
+/// Helper macro to return error code x from a function when it is not FFX_OK.
+///
+/// @ingroup Errors
+#define FFX_VALIDATE(x) \
+ { \
+ FfxErrorCode ret = x; \
+ FFX_RETURN_ON_ERROR(ret == FFX_OK, ret); \
+ }
diff --git a/thirdparty/amd-ffx/ffx_frameinterpolation.cpp b/thirdparty/amd-ffx/ffx_frameinterpolation.cpp
new file mode 100644
index 000000000000..ec13f7aed7d2
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_frameinterpolation.cpp
@@ -0,0 +1,1265 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include // for max used inside SPD CPU code.
+#include // for fabs, abs, sinf, sqrt, etc.
+#include // for memset
+#include // for FLT_EPSILON
+#include "ffx_frameinterpolation.h"
+
+#define FFX_CPU
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wunused-function"
+#pragma clang diagnostic ignored "-Wsign-compare"
+#endif
+
+#include "gpu/ffx_core.h"
+#include "gpu/spd/ffx_spd.h"
+#include "ffx_object_management.h"
+
+#include "ffx_frameinterpolation_private.h"
+
+// lists to map shader resource bindpoint name to resource identifier
+typedef struct ResourceBinding
+{
+ uint32_t index;
+ wchar_t name[64];
+}ResourceBinding;
+
+static const ResourceBinding srvResourceBindingTable[] =
+{
+ // Frame Interpolation textures
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEPTH, L"r_input_depth"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_MOTION_VECTORS, L"r_input_motion_vectors"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISTORTION_FIELD, L"r_input_distortion_field"},
+
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"r_dilated_depth"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, L"r_dilated_motion_vectors"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME, L"r_reconstructed_depth_previous_frame"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME, L"r_reconstructed_depth_interpolated_frame"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PREVIOUS_INTERPOLATION_SOURCE, L"r_previous_interpolation_source"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE, L"r_current_interpolation_source"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISOCCLUSION_MASK, L"r_disocclusion_mask"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_X, L"r_game_motion_vector_field_x"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_Y, L"r_game_motion_vector_field_y"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X, L"r_optical_flow_motion_vector_field_x"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y, L"r_optical_flow_motion_vector_field_y"},
+
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR, L"r_optical_flow"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_CONFIDENCE, L"r_optical_flow_confidence"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_GLOBAL_MOTION, L"r_optical_flow_global_motion"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCENE_CHANGE_DETECTION, L"r_optical_flow_scd"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT, L"r_output"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_MASK, L"r_inpainting_mask"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID, L"r_inpainting_pyramid"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PRESENT_BACKBUFFER, L"r_present_backbuffer"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS, L"r_counters"},
+};
+
+static const ResourceBinding uavResourceBindingTable[] =
+{
+ // Frame Interpolation textures
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"rw_dilated_depth"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, L"rw_dilated_motion_vectors"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME, L"rw_reconstructed_depth_previous_frame"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME, L"rw_reconstructed_depth_interpolated_frame"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT, L"rw_output"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISOCCLUSION_MASK, L"rw_disocclusion_mask"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_X, L"rw_game_motion_vector_field_x"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_Y, L"rw_game_motion_vector_field_y"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X, L"rw_optical_flow_motion_vector_field_x"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y, L"rw_optical_flow_motion_vector_field_y"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_MASK, L"rw_inpainting_mask"},
+
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS, L"rw_counters"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_0, L"rw_inpainting_pyramid0"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_1, L"rw_inpainting_pyramid1"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_2, L"rw_inpainting_pyramid2"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_3, L"rw_inpainting_pyramid3"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_4, L"rw_inpainting_pyramid4"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_5, L"rw_inpainting_pyramid5"}, // extra declaration, as this is globallycoherent
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_6, L"rw_inpainting_pyramid6"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_7, L"rw_inpainting_pyramid7"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_8, L"rw_inpainting_pyramid8"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_9, L"rw_inpainting_pyramid9"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_10, L"rw_inpainting_pyramid10"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_11, L"rw_inpainting_pyramid11"},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_12, L"rw_inpainting_pyramid12"},
+};
+
+static const ResourceBinding cbResourceBindingTable[] =
+{
+ {FFX_FRAMEINTERPOLATION_CONSTANTBUFFER_IDENTIFIER, L"cbFI"},
+ {FFX_FRAMEINTERPOLATION_INPAINTING_PYRAMID_CONSTANTBUFFER_IDENTIFIER, L"cbInpaintingPyramid"},
+};
+
+// Broad structure of the root signature.
+typedef enum FrameInterpolationRootSignatureLayout {
+
+ FRAMEINTERPOLATION_ROOT_SIGNATURE_LAYOUT_UAVS,
+ FRAMEINTERPOLATION_ROOT_SIGNATURE_LAYOUT_SRVS,
+ FRAMEINTERPOLATION_ROOT_SIGNATURE_LAYOUT_CONSTANTS,
+ FRAMEINTERPOLATION_ROOT_SIGNATURE_LAYOUT_CONSTANTS_REGISTER_1,
+ FRAMEINTERPOLATION_ROOT_SIGNATURE_LAYOUT_PARAMETER_COUNT
+} FrameInterpolationRootSignatureLayout;
+
+typedef union FrameInterpolationSecondaryUnion
+{
+ InpaintingPyramidConstants inpaintingPyramidConstants;
+} FrameInterpolationSecondaryUnion;
+
+// Lanczos
+static float lanczos2(float value)
+{
+ return abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value));
+}
+
+// Calculate halton number for index and base.
+static float halton(int32_t index, int32_t base)
+{
+ float f = 1.0f, result = 0.0f;
+
+ for (int32_t currentIndex = index; currentIndex > 0;) {
+
+ f /= (float)base;
+ result = result + f * (float)(currentIndex % base);
+ currentIndex = (uint32_t)(floorf((float)(currentIndex) / (float)(base)));
+ }
+
+ return result;
+}
+
+static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
+{
+ for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvTextureCount; ++srvIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex)
+ {
+ if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvTextureBindings[srvIndex].name))
+ break;
+ }
+ if (mapIndex == _countof(srvResourceBindingTable))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->srvTextureBindings[srvIndex].resourceIdentifier = srvResourceBindingTable[mapIndex].index;
+ }
+
+ // check for UAVs where mip chains are to be bound
+ for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavTextureCount; ++uavIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex)
+ {
+ if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavTextureBindings[uavIndex].name))
+ break;
+ }
+ if (mapIndex == _countof(uavResourceBindingTable))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->uavTextureBindings[uavIndex].resourceIdentifier = uavResourceBindingTable[mapIndex].index;
+ }
+
+ for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex)
+ {
+ if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->constantBufferBindings[cbIndex].name))
+ break;
+ }
+ if (mapIndex == _countof(cbResourceBindingTable))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->constantBufferBindings[cbIndex].resourceIdentifier = cbResourceBindingTable[mapIndex].index;
+ }
+
+ for (uint32_t uavBufferIndex = 0; uavBufferIndex < inoutPipeline->uavBufferCount; ++uavBufferIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex)
+ {
+ if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavBufferBindings[uavBufferIndex].name))
+ break;
+ }
+ if (mapIndex == _countof(uavResourceBindingTable))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->uavBufferBindings[uavBufferIndex].resourceIdentifier = uavResourceBindingTable[mapIndex].index;
+ }
+
+ for (uint32_t srvBufferIndex = 0; srvBufferIndex < inoutPipeline->srvBufferCount; ++srvBufferIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex)
+ {
+ if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvBufferBindings[srvBufferIndex].name))
+ break;
+ }
+ if (mapIndex == _countof(srvResourceBindingTable))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->srvBufferBindings[srvBufferIndex].resourceIdentifier = srvResourceBindingTable[mapIndex].index;
+ }
+
+
+ return FFX_OK;
+}
+
+static uint32_t getPipelinePermutationFlags(uint32_t contextFlags, FfxPass, bool fp16, bool force64, bool)
+{
+ // work out what permutation to load.
+ uint32_t flags = 0;
+ flags |= (contextFlags & FFX_FRAMEINTERPOLATION_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) ? 0 : FRAMEINTERPOLATION_SHADER_PERMUTATION_LOW_RES_MOTION_VECTORS;
+ flags |= (contextFlags & FFX_FRAMEINTERPOLATION_ENABLE_JITTER_MOTION_VECTORS) ? FRAMEINTERPOLATION_SHADER_PERMUTATION_JITTER_MOTION_VECTORS : 0;
+ flags |= (contextFlags & FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED) ? FRAMEINTERPOLATION_SHADER_PERMUTATION_DEPTH_INVERTED : 0;
+ flags |= (force64) ? FRAMEINTERPOLATION_SHADER_PERMUTATION_FORCE_WAVE64 : 0;
+ flags |= (fp16) ? FRAMEINTERPOLATION_SHADER_PERMUTATION_ALLOW_FP16 : 0;
+ return flags;
+}
+
+static FfxErrorCode createPipelineStates(FfxFrameInterpolationContext_Private* context)
+{
+ FFX_ASSERT(context);
+
+ FfxPipelineDescription pipelineDescription = {};
+ pipelineDescription.contextFlags = context->contextDescription.flags;
+ pipelineDescription.stage = FFX_BIND_COMPUTE_SHADER_STAGE;
+
+ // Samplers
+ pipelineDescription.samplerCount = 2;
+ FfxSamplerDescription samplerDescs[2] = {
+ {FFX_FILTER_TYPE_MINMAGMIP_POINT, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE},
+ {FFX_FILTER_TYPE_MINMAGMIP_LINEAR, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE}};
+ pipelineDescription.samplers = samplerDescs;
+
+ // Root constants
+ pipelineDescription.rootConstantBufferCount = 2;
+ FfxRootConstantDescription rootConstantDescs[2] =
+ {
+ {sizeof(FrameInterpolationConstants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE},
+ {sizeof(InpaintingPyramidConstants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE}
+ };
+ pipelineDescription.rootConstants = rootConstantDescs;
+
+ // Query device capabilities
+ FfxDeviceCapabilities capabilities;
+ context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &capabilities);
+
+ // Setup a few options used to determine permutation flags
+ bool haveShaderModel66 = capabilities.maximumSupportedShaderModel >= FFX_SHADER_MODEL_6_6;
+ bool supportedFP16 = capabilities.fp16Supported;
+ bool canForceWave64 = false;
+ bool useLut = false;
+
+ const uint32_t waveLaneCountMin = capabilities.waveLaneCountMin;
+ const uint32_t waveLaneCountMax = capabilities.waveLaneCountMax;
+ if (waveLaneCountMin == 32 && waveLaneCountMax == 64)
+ {
+ useLut = true;
+ canForceWave64 = haveShaderModel66;
+ }
+ else
+ canForceWave64 = false;
+
+ // Work out what permutation to load.
+ uint32_t contextFlags = context->contextDescription.flags;
+
+ // Set up pipeline descriptor (basically RootSignature and binding)
+ auto CreateComputePipeline = [&](FfxPass pass, const wchar_t* name, FfxPipelineState* pipeline) -> FfxErrorCode {
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, pipeline, context->effectContextId);
+ wcscpy_s(pipelineDescription.name, name);
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(
+ &context->contextDescription.backendInterface,
+ FFX_EFFECT_FRAMEINTERPOLATION,
+ pass,
+ getPipelinePermutationFlags(contextFlags, pass, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription,
+ context->effectContextId,
+ pipeline));
+ patchResourceBindings(pipeline);
+
+ return FFX_OK;
+ };
+
+ auto CreateRasterPipeline = [&](FfxPass pass, const wchar_t* name, FfxPipelineState* pipeline) -> FfxErrorCode {
+ wcscpy_s(pipelineDescription.name, name);
+ pipelineDescription.stage = (FfxBindStage)(FFX_BIND_VERTEX_SHADER_STAGE | FFX_BIND_PIXEL_SHADER_STAGE);
+ pipelineDescription.backbufferFormat = context->contextDescription.backBufferFormat;
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(
+ &context->contextDescription.backendInterface,
+ FFX_EFFECT_FRAMEINTERPOLATION,
+ pass,
+ getPipelinePermutationFlags(contextFlags, pass, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription,
+ context->effectContextId,
+ pipeline));
+
+ return FFX_OK;
+ };
+
+ // Frame Interpolation Pipelines
+ CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_RECONSTRUCT_AND_DILATE, L"RECONSTRUCT_AND_DILATE", &context->pipelineFiReconstructAndDilate);
+ CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_SETUP, L"SETUP", &context->pipelineFiSetup);
+ CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_RECONSTRUCT_PREV_DEPTH, L"RECONSTRUCT_PREV_DEPTH", &context->pipelineFiReconstructPreviousDepth);
+ CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_GAME_MOTION_VECTOR_FIELD, L"GAME_MOTION_VECTOR_FIELD", &context->pipelineFiGameMotionVectorField);
+ CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_OPTICAL_FLOW_VECTOR_FIELD, L"OPTICAL_FLOW_VECTOR_FIELD", &context->pipelineFiOpticalFlowVectorField);
+ CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_DISOCCLUSION_MASK, L"DISOCCLUSION_MASK", &context->pipelineFiDisocclusionMask);
+ CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_INTERPOLATION, L"INTERPOLATION", &context->pipelineFiScfi);
+ CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_INPAINTING_PYRAMID, L"INPAINTING_PYRAMID", &context->pipelineInpaintingPyramid);
+ CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_INPAINTING, L"INPAINTING", &context->pipelineInpainting);
+ CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_GAME_VECTOR_FIELD_INPAINTING_PYRAMID, L"GAME_VECTOR_FIELD_INPAINTING_PYRAMID", & context->pipelineGameVectorFieldInpaintingPyramid);
+ CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_DEBUG_VIEW, L"DEBUG_VIEW", &context->pipelineDebugView);
+
+ return FFX_OK;
+}
+
+
+// Format precision group for HUDless.
+// Also format needs at least the 3 RGB channels to be valid
+// int formats aren't accepted.
+int GetFormatPrecisionGroup(FfxSurfaceFormat format)
+{
+ switch (format)
+ {
+ case FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS:
+ case FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT:
+ case FFX_SURFACE_FORMAT_R32G32B32_FLOAT:
+ return 0;
+
+ case FFX_SURFACE_FORMAT_R16G16B16A16_TYPELESS:
+ case FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT:
+ return 1;
+
+ case FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS:
+ case FFX_SURFACE_FORMAT_R8G8B8A8_UNORM:
+ case FFX_SURFACE_FORMAT_B8G8R8A8_TYPELESS:
+ case FFX_SURFACE_FORMAT_B8G8R8A8_UNORM:
+ return 2;
+
+ case FFX_SURFACE_FORMAT_R8G8B8A8_SNORM:
+ return 3;
+
+ case FFX_SURFACE_FORMAT_R8G8B8A8_SRGB:
+ case FFX_SURFACE_FORMAT_B8G8R8A8_SRGB:
+ return 4;
+
+ case FFX_SURFACE_FORMAT_R11G11B10_FLOAT:
+ return 5;
+
+ case FFX_SURFACE_FORMAT_R10G10B10A2_TYPELESS:
+ case FFX_SURFACE_FORMAT_R10G10B10A2_UNORM:
+ return 6;
+
+ case FFX_SURFACE_FORMAT_R9G9B9E5_SHAREDEXP:
+ return 7;
+
+ // we don't accept the following formats
+ case FFX_SURFACE_FORMAT_R32G32B32A32_UINT:
+ case FFX_SURFACE_FORMAT_R32G32_FLOAT:
+ case FFX_SURFACE_FORMAT_R8_UINT:
+ case FFX_SURFACE_FORMAT_R32_UINT:
+ case FFX_SURFACE_FORMAT_R16G16_UINT:
+ case FFX_SURFACE_FORMAT_R16G16_SINT:
+ case FFX_SURFACE_FORMAT_R16G16_FLOAT:
+ case FFX_SURFACE_FORMAT_R16_FLOAT:
+ case FFX_SURFACE_FORMAT_R16_UINT:
+ case FFX_SURFACE_FORMAT_R16_UNORM:
+ case FFX_SURFACE_FORMAT_R16_SNORM:
+ case FFX_SURFACE_FORMAT_R8_UNORM:
+ case FFX_SURFACE_FORMAT_R8G8_UNORM:
+ case FFX_SURFACE_FORMAT_R8G8_UINT:
+ case FFX_SURFACE_FORMAT_R32_FLOAT:
+ case FFX_SURFACE_FORMAT_UNKNOWN:
+ default:
+ return -1;
+ }
+}
+
+static FfxErrorCode frameinterpolationCreate(FfxFrameInterpolationContext_Private* context, const FfxFrameInterpolationContextDescription* contextDescription)
+{
+ FFX_ASSERT(context);
+ FFX_ASSERT(contextDescription);
+
+ // validate compatibility between backbuffer and hudless formats
+ int backBufferGroup = GetFormatPrecisionGroup(contextDescription->backBufferFormat);
+ int previousInterpolationSourceGroup = GetFormatPrecisionGroup(contextDescription->previousInterpolationSourceFormat);
+ FFX_RETURN_ON_ERROR(backBufferGroup >= 0 && previousInterpolationSourceGroup >= 0 && backBufferGroup == previousInterpolationSourceGroup, FFX_ERROR_INVALID_ARGUMENT);
+
+ // Setup the data for implementation.
+ memset(context, 0, sizeof(FfxFrameInterpolationContext_Private));
+ context->device = contextDescription->backendInterface.device;
+
+ memcpy(&context->contextDescription, contextDescription, sizeof(FfxFrameInterpolationContextDescription));
+
+ // Check version info - make sure we are linked with the right backend version
+ FfxVersionNumber version = context->contextDescription.backendInterface.fpGetSDKVersion(&context->contextDescription.backendInterface);
+ FFX_RETURN_ON_ERROR(version == FFX_SDK_MAKE_VERSION(1, 1, 4), FFX_ERROR_INVALID_VERSION);
+
+ // Create the context.
+ FfxErrorCode errorCode = context->contextDescription.backendInterface.fpCreateBackendContext(&context->contextDescription.backendInterface, FFX_EFFECT_FRAMEINTERPOLATION, nullptr, &context->effectContextId);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ // call out for device caps.
+ errorCode = context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &context->deviceCapabilities);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ // set defaults
+ context->firstExecution = true;
+
+ context->asyncSupported = (contextDescription->flags & FFX_FRAMEINTERPOLATION_ENABLE_ASYNC_SUPPORT) == FFX_FRAMEINTERPOLATION_ENABLE_ASYNC_SUPPORT;
+ context->constants.maxRenderSize[0] = contextDescription->maxRenderSize.width;
+ context->constants.maxRenderSize[1] = contextDescription->maxRenderSize.height;
+ context->constants.displaySize[0] = contextDescription->displaySize.width;
+ context->constants.displaySize[1] = contextDescription->displaySize.height;
+ context->constants.displaySizeRcp[0] = 1.0f / contextDescription->displaySize.width;
+ context->constants.displaySizeRcp[1] = 1.0f / contextDescription->displaySize.height;
+ context->constants.interpolationRectBase[0] = 0;
+ context->constants.interpolationRectBase[1] = 0;
+ context->constants.interpolationRectSize[0] = contextDescription->displaySize.width;
+ context->constants.interpolationRectSize[1] = contextDescription->displaySize.height;
+
+ // generate the data for the LUT.
+ const uint32_t lanczos2LutWidth = 128;
+ int16_t lanczos2Weights[lanczos2LutWidth] = { };
+
+ for (uint32_t currentLanczosWidthIndex = 0; currentLanczosWidthIndex < lanczos2LutWidth; currentLanczosWidthIndex++) {
+
+ const float x = 2.0f * currentLanczosWidthIndex / float(lanczos2LutWidth - 1);
+ const float y = lanczos2(x);
+ lanczos2Weights[currentLanczosWidthIndex] = int16_t(roundf(y * 32767.0f));
+ }
+
+ uint8_t defaultDistortionFieldData[2] = { 0, 0 };
+ uint32_t atomicInitData[2] = { 0, 0 };
+ float defaultExposure[] = { 0.0f, 0.0f };
+ const FfxResourceType texture1dResourceType = (context->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_TEXTURE1D_USAGE) ? FFX_RESOURCE_TYPE_TEXTURE1D : FFX_RESOURCE_TYPE_TEXTURE2D;
+
+ // declare internal resources needed
+ const FfxInternalResourceDescription internalSurfaceDesc[] = {
+
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME, L"FI_ReconstructedDepthInterpolatedFrame", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_X, L"FI_GameMotionVectorFieldX", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_Y, L"FI_GameMotionVectorFieldY", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID, L"FI_InpaintingPyramid", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->displaySize.width / 2, contextDescription->displaySize.height / 2, 0, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS, L"FI_Counters", FFX_RESOURCE_TYPE_BUFFER, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_UNKNOWN, 8, 4, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, // structured buffer contraining 2 UINT values
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X, L"FI_OpticalFlowMotionVectorFieldX", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y, L"FI_OpticalFlowMotionVectorFieldY", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PREVIOUS_INTERPOLATION_SOURCE, L"FI_PreviousInterpolationSouce", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ contextDescription->previousInterpolationSourceFormat, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_MASK, L"FI_InpaintingMask", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISOCCLUSION_MASK, L"FI_DisocclusionMask", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8G8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEFAULT_DISTORTION_FIELD, L"FI_DefaultDistortionField", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_READ_ONLY,
+ FFX_SURFACE_FORMAT_R8G8_UNORM, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, FfxResourceInitData::FfxResourceInitBuffer(sizeof(defaultDistortionFieldData), defaultDistortionFieldData) },
+
+ };
+
+ // clear the SRV resources to NULL.
+ memset(context->srvResources, 0, sizeof(context->srvResources));
+
+ for (int32_t currentSurfaceIndex = 0; currentSurfaceIndex < FFX_ARRAY_ELEMENTS(internalSurfaceDesc); ++currentSurfaceIndex) {
+
+ const FfxInternalResourceDescription* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex];
+ const FfxResourceDescription resourceDescription = {currentSurfaceDescription->type,
+ currentSurfaceDescription->format,
+ currentSurfaceDescription->width,
+ currentSurfaceDescription->height,
+ 1,
+ currentSurfaceDescription->mipCount,
+ currentSurfaceDescription->flags,
+ currentSurfaceDescription->usage};
+ FfxResourceStates initialState = FFX_RESOURCE_STATE_UNORDERED_ACCESS;
+ if (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_READ_ONLY) initialState = FFX_RESOURCE_STATE_COMPUTE_READ;
+ if (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_RENDERTARGET) initialState = FFX_RESOURCE_STATE_RENDER_TARGET;
+
+ const FfxCreateResourceDescription createResourceDescription = { FFX_HEAP_TYPE_DEFAULT, resourceDescription, initialState, currentSurfaceDescription->name, currentSurfaceDescription->id, currentSurfaceDescription->initData };
+
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreateResource(&context->contextDescription.backendInterface, &createResourceDescription, context->effectContextId, &context->srvResources[currentSurfaceDescription->id]));
+ }
+
+ // copy resources to uavResrouces list
+ memcpy(context->uavResources, context->srvResources, sizeof(context->srvResources));
+
+ // avoid compiling pipelines on first render
+ {
+ context->refreshPipelineStates = false;
+ errorCode = createPipelineStates(context);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+ }
+
+ return FFX_OK;
+}
+
+static FfxErrorCode frameinterpolationRelease(FfxFrameInterpolationContext_Private* context)
+{
+ FFX_ASSERT(context);
+
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiReconstructAndDilate, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiSetup, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiReconstructPreviousDepth, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiGameMotionVectorField, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiOpticalFlowVectorField, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiDisocclusionMask, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiScfi, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineInpaintingPyramid, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineInpainting, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineGameVectorFieldInpaintingPyramid, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineDebugView, context->effectContextId);
+
+ // unregister resources not created internally
+ context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_CONFIDENCE] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_GLOBAL_MOTION] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCENE_CHANGE_DETECTION] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ context->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ context->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ context->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ context->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISTORTION_FIELD] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+
+ // Release the copy resources for those that had init data
+ ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS], context->effectContextId);
+ ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEFAULT_DISTORTION_FIELD], context->effectContextId);
+
+ // release internal resources
+ for (int32_t currentResourceIndex = 0; currentResourceIndex < FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT; ++currentResourceIndex) {
+
+ ffxSafeReleaseResource(&context->contextDescription.backendInterface, context->srvResources[currentResourceIndex], context->effectContextId);
+ }
+
+ // Destroy the context
+ context->contextDescription.backendInterface.fpDestroyBackendContext(&context->contextDescription.backendInterface, context->effectContextId);
+
+ return FFX_OK;
+}
+
+static void scheduleDispatch(FfxFrameInterpolationContext_Private* context, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY)
+{
+ FfxComputeJobDescription jobDescriptor = {};
+
+ for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex)
+ {
+ const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier;
+ const FfxResourceInternal currentResource = context->srvResources[currentResourceId];
+ jobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource;
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.srvTextures[currentShaderResourceViewIndex].name, pipeline->srvTextureBindings[currentShaderResourceViewIndex].name);
+#endif
+ }
+
+ for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavTextureCount; ++currentUnorderedAccessViewIndex) {
+
+ const uint32_t currentResourceId = pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].resourceIdentifier;
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].name, pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].name);
+#endif
+
+ if (currentResourceId >= FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_0 && currentResourceId <= FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_12)
+ {
+ const FfxResourceInternal currentResource = context->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID];
+ jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource;
+ jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = currentResourceId - FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_0;
+ }
+ else
+ {
+ const FfxResourceInternal currentResource = context->uavResources[currentResourceId];
+ jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource;
+ jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = 0;
+ }
+ }
+
+ jobDescriptor.dimensions[0] = dispatchX;
+ jobDescriptor.dimensions[1] = dispatchY;
+ jobDescriptor.dimensions[2] = 1;
+ jobDescriptor.pipeline = *pipeline;
+
+ for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) {
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.cbNames[currentRootConstantIndex], pipeline->constantBufferBindings[currentRootConstantIndex].name);
+#endif
+ jobDescriptor.cbs[currentRootConstantIndex] = context->constantBuffers[pipeline->constantBufferBindings[currentRootConstantIndex].resourceIdentifier];
+ }
+
+ for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavBufferCount; ++currentUnorderedAccessViewIndex)
+ {
+ const uint32_t currentResourceId = pipeline->uavBufferBindings[currentUnorderedAccessViewIndex].resourceIdentifier;
+ jobDescriptor.uavBuffers[currentUnorderedAccessViewIndex].resource = context->uavResources[currentResourceId];
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.uavBuffers[currentUnorderedAccessViewIndex].name, pipeline->uavBufferBindings[currentUnorderedAccessViewIndex].name);
+#endif
+ }
+
+ for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvBufferCount; ++currentShaderResourceViewIndex)
+ {
+ const uint32_t currentResourceId = pipeline->srvBufferBindings[currentShaderResourceViewIndex].resourceIdentifier;
+ jobDescriptor.srvBuffers[currentShaderResourceViewIndex].resource = context->srvResources[currentResourceId];
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.srvBuffers[currentShaderResourceViewIndex].name, pipeline->srvBufferBindings[currentShaderResourceViewIndex].name);
+#endif
+ }
+
+ FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE };
+ wcscpy_s(dispatchJob.jobLabel, pipeline->name);
+ dispatchJob.computeJobDescriptor = jobDescriptor;
+
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &dispatchJob);
+}
+
+FFX_API FfxErrorCode ffxFrameInterpolationGetSharedResourceDescriptions(FfxFrameInterpolationContext* context, FfxFrameInterpolationSharedResourceDescriptions* SharedResources)
+{
+ FFX_RETURN_ON_ERROR(
+ context,
+ FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(
+ SharedResources,
+ FFX_ERROR_INVALID_POINTER);
+
+ FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)(context);
+ SharedResources->dilatedDepth = { FFX_HEAP_TYPE_DEFAULT, { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R32_FLOAT, contextPrivate->contextDescription.maxRenderSize.width, contextPrivate->contextDescription.maxRenderSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET) },
+ FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"FISHARED_DilatedDepth", FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} };
+ SharedResources->dilatedMotionVectors = { FFX_HEAP_TYPE_DEFAULT, { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R16G16_FLOAT, contextPrivate->contextDescription.maxRenderSize.width, contextPrivate->contextDescription.maxRenderSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET) },
+ FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"FISHARED_DilatedVelocity", FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} };
+ SharedResources->reconstructedPrevNearestDepth = { FFX_HEAP_TYPE_DEFAULT, { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R32_UINT, contextPrivate->contextDescription.maxRenderSize.width, contextPrivate->contextDescription.maxRenderSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV) },
+ FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"FISHARED_ReconstructedPrevNearestDepth", FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} };
+
+ return FFX_OK;
+}
+
+FfxErrorCode ffxFrameInterpolationContextCreate(FfxFrameInterpolationContext* context, FfxFrameInterpolationContextDescription* contextDescription)
+{
+ // zero context memory
+ //memset(context, 0, sizeof(FfxFrameinterpolationContext));
+
+ // check pointers are valid.
+ FFX_RETURN_ON_ERROR(
+ context,
+ FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(
+ contextDescription,
+ FFX_ERROR_INVALID_POINTER);
+
+ // validate that all callbacks are set for the interface
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetSDKVersion, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+
+ // if a scratch buffer is declared, then we must have a size
+ if (contextDescription->backendInterface.scratchBuffer) {
+
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE);
+ }
+
+ // ensure the context is large enough for the internal context.
+ FFX_STATIC_ASSERT(sizeof(FfxFrameInterpolationContext) >= sizeof(FfxFrameInterpolationContext_Private));
+
+ // create the context.
+ FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)(context);
+ FfxErrorCode errorCode = frameinterpolationCreate(contextPrivate, contextDescription);
+
+ return errorCode;
+}
+
+FFX_API FfxErrorCode ffxFrameInterpolationContextGetGpuMemoryUsage(FfxFrameInterpolationContext* context, FfxEffectMemoryUsage* vramUsage)
+{
+ FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(vramUsage, FFX_ERROR_INVALID_POINTER);
+ FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)(context);
+
+ FFX_RETURN_ON_ERROR(contextPrivate->device, FFX_ERROR_NULL_DEVICE);
+
+ FfxErrorCode errorCode = contextPrivate->contextDescription.backendInterface.fpGetEffectGpuMemoryUsage(
+ &contextPrivate->contextDescription.backendInterface, contextPrivate->effectContextId, vramUsage);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ return FFX_OK;
+}
+
+FFX_API FfxErrorCode ffxSharedContextGetGpuMemoryUsage(FfxInterface* backendInterfaceShared, FfxEffectMemoryUsage* vramUsage)
+{
+ FFX_RETURN_ON_ERROR(backendInterfaceShared, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(vramUsage, FFX_ERROR_INVALID_POINTER);
+
+ FfxErrorCode errorCode = backendInterfaceShared->fpGetEffectGpuMemoryUsage(
+ backendInterfaceShared, 0, vramUsage);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ return FFX_OK;
+}
+
+FfxErrorCode ffxFrameInterpolationContextDestroy(FfxFrameInterpolationContext* context)
+{
+ FFX_RETURN_ON_ERROR(
+ context,
+ FFX_ERROR_INVALID_POINTER);
+
+ // destroy the context.
+ FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)(context);
+ const FfxErrorCode errorCode = frameinterpolationRelease(contextPrivate);
+
+ return errorCode;
+}
+
+FfxErrorCode ffxFrameInterpolationContextEnqueueRefreshPipelineRequest(FfxFrameInterpolationContext* context)
+{
+ FFX_RETURN_ON_ERROR(
+ context,
+ FFX_ERROR_INVALID_POINTER);
+
+ FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)context;
+ contextPrivate->refreshPipelineStates = true;
+
+ return FFX_OK;
+}
+
+static void setupDeviceDepthToViewSpaceDepthParams(FfxFrameInterpolationContext_Private* context, const FfxFrameInterpolationRenderDescription* params, FrameInterpolationConstants* constants)
+{
+ const bool bInverted = (context->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED) == FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED;
+ const bool bInfinite = (context->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INFINITE) == FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INFINITE;
+
+ // make sure it has no impact if near and far plane values are swapped in dispatch params
+ // the flags "inverted" and "infinite" will decide what transform to use
+ float fMin = FFX_MINIMUM(params->cameraNear, params->cameraFar);
+ float fMax = FFX_MAXIMUM(params->cameraNear, params->cameraFar);
+
+ if (bInverted) {
+ float tmp = fMin;
+ fMin = fMax;
+ fMax = tmp;
+ }
+
+ // a 0 0 0 x
+ // 0 b 0 0 y
+ // 0 0 c d z
+ // 0 0 e 0 1
+
+ const float fQ = fMax / (fMin - fMax);
+ const float d = -1.0f; // for clarity
+
+ const float matrix_elem_c[2][2] = {
+ fQ, // non reversed, non infinite
+ -1.0f - FLT_EPSILON, // non reversed, infinite
+ fQ, // reversed, non infinite
+ 0.0f + FLT_EPSILON // reversed, infinite
+ };
+
+ const float matrix_elem_e[2][2] = {
+ fQ * fMin, // non reversed, non infinite
+ -fMin - FLT_EPSILON, // non reversed, infinite
+ fQ * fMin, // reversed, non infinite
+ fMax, // reversed, infinite
+ };
+
+ constants->deviceToViewDepth[0] = d * matrix_elem_c[bInverted][bInfinite];
+ constants->deviceToViewDepth[1] = matrix_elem_e[bInverted][bInfinite] * params->viewSpaceToMetersFactor;
+
+ // revert x and y coords
+ const float aspect = params->renderSize.width / float(params->renderSize.height);
+ const float cotHalfFovY = cosf(0.5f * params->cameraFovAngleVertical) / sinf(0.5f * params->cameraFovAngleVertical);
+ const float a = cotHalfFovY / aspect;
+ const float b = cotHalfFovY;
+
+ constants->deviceToViewDepth[2] = (1.0f / a);
+ constants->deviceToViewDepth[3] = (1.0f / b);
+
+}
+
+FFX_API bool ffxFrameInterpolationResourceIsNull(FfxResource resource)
+{
+ return resource.resource == NULL;
+}
+
+static const float debugBarColorSequence[] = {
+ 0.0f, 1.0f, 1.0f, // teal
+ 1.0f, 0.42f, 0.0f, // orange
+ 0.0f, 0.16f, 1.0f, // blue
+ 0.74f, 1.0f, 0.0f, // lime
+ 0.68f, 0.0f, 1.0f, // purple
+ 0.0f, 1.0f, 0.1f, // green
+ 1.0f, 1.0f, 0.48f // bright yellow
+};
+const size_t debugBarColorSequenceLength = 7;
+
+static void fsr3FrameInterpolationDebugCheckPrepare(FfxFrameInterpolationContext_Private* context, const FfxFrameInterpolationPrepareDescription* params)
+{
+
+ static const FfxFloat32x3 zeroVector3D = { 0.f,0.f,0.f };
+ if ((memcmp(params->cameraPosition, zeroVector3D, sizeof(FfxFloat32x3)) == 0) &&
+ (memcmp(params->cameraUp, zeroVector3D, sizeof(FfxFloat32x3)) == 0) &&
+ (memcmp(params->cameraRight, zeroVector3D, sizeof(FfxFloat32x3)) == 0) &&
+ (memcmp(params->cameraForward, zeroVector3D, sizeof(FfxFloat32x3)) == 0))
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"ffxDispatchDescFrameGenerationPrepareCameraInfo needs to be passed as linked struct. This is a required input to FSR3.1.4 and onwards for best quality.");
+ }
+}
+
+FFX_API FfxErrorCode ffxFrameInterpolationPrepare(FfxFrameInterpolationContext* context,
+ const FfxFrameInterpolationPrepareDescription* params)
+{
+ FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)(context);
+
+ if ((contextPrivate->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_DEBUG_CHECKING) == FFX_FRAMEINTERPOLATION_ENABLE_DEBUG_CHECKING)
+ {
+ fsr3FrameInterpolationDebugCheckPrepare(contextPrivate, params);
+ }
+
+ contextPrivate->constants.renderSize[0] = params->renderSize.width;
+ contextPrivate->constants.renderSize[1] = params->renderSize.height;
+ contextPrivate->constants.jitter[0] = params->jitterOffset.x;
+ contextPrivate->constants.jitter[1] = params->jitterOffset.y;
+
+ const int32_t* motionVectorsTargetSize = (contextPrivate->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS)
+ ? contextPrivate->constants.displaySize
+ : contextPrivate->constants.renderSize;
+ contextPrivate->constants.motionVectorScale[0] = (params->motionVectorScale.x / motionVectorsTargetSize[0]);
+ contextPrivate->constants.motionVectorScale[1] = (params->motionVectorScale.y / motionVectorsTargetSize[1]);
+
+ contextPrivate->contextDescription.backendInterface.fpStageConstantBufferDataFunc(
+ &contextPrivate->contextDescription.backendInterface,
+ &contextPrivate->constants,
+ sizeof(contextPrivate->constants),
+ &contextPrivate->constantBuffers[FFX_FRAMEINTERPOLATION_CONSTANTBUFFER_IDENTIFIER]);
+
+ FFX_ASSERT(!ffxFrameInterpolationResourceIsNull(params->depth));
+ FFX_ASSERT(!ffxFrameInterpolationResourceIsNull(params->motionVectors));
+
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(
+ &contextPrivate->contextDescription.backendInterface,
+ ¶ms->depth,
+ contextPrivate->effectContextId,
+ &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEPTH]);
+
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(
+ &contextPrivate->contextDescription.backendInterface,
+ ¶ms->motionVectors,
+ contextPrivate->effectContextId,
+ &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_MOTION_VECTORS]);
+
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(
+ &contextPrivate->contextDescription.backendInterface,
+ ¶ms->dilatedDepth,
+ contextPrivate->effectContextId,
+ &contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH]);
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(
+ &contextPrivate->contextDescription.backendInterface,
+ ¶ms->dilatedMotionVectors,
+ contextPrivate->effectContextId,
+ &contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS]);
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(
+ &contextPrivate->contextDescription.backendInterface,
+ ¶ms->reconstructedPrevDepth,
+ contextPrivate->effectContextId,
+ &contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME]);
+
+ // clear estimated depth resources
+ {
+ FfxGpuJobDescription clearJob = {FFX_GPU_JOB_CLEAR_FLOAT};
+ const bool bInverted = (contextPrivate->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED) == FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED;
+ const float clearDepthValue[]{bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f};
+ memcpy(clearJob.clearJobDescriptor.color, clearDepthValue, 4 * sizeof(float));
+ wcscpy_s(clearJob.jobLabel, L"Clear Reconstructed Previous Nearest Depth");
+ clearJob.clearJobDescriptor.target = contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME];
+ contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &clearJob);
+ }
+
+ uint32_t renderDispatchSizeX = uint32_t(params->renderSize.width + 7) / 8;
+ uint32_t renderDispatchSizeY = uint32_t(params->renderSize.height + 7) / 8;
+
+ scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiReconstructAndDilate, renderDispatchSizeX, renderDispatchSizeY);
+
+ contextPrivate->contextDescription.backendInterface.fpExecuteGpuJobs(&contextPrivate->contextDescription.backendInterface, params->commandList, contextPrivate->effectContextId);
+
+ // release dynamic resources
+ contextPrivate->contextDescription.backendInterface.fpUnregisterResources(&contextPrivate->contextDescription.backendInterface,
+ params->commandList,
+ contextPrivate->effectContextId);
+
+ contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEPTH] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_MOTION_VECTORS] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+ contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL};
+
+ return FFX_OK;
+}
+
+FFX_API FfxErrorCode ffxFrameInterpolationDispatch(FfxFrameInterpolationContext* context, const FfxFrameInterpolationDispatchDescription* params)
+{
+ FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)(context);
+ const FfxFrameInterpolationRenderDescription* renderDesc = &contextPrivate->renderDescription;
+
+ if (contextPrivate->refreshPipelineStates) {
+
+ createPipelineStates(contextPrivate);
+ contextPrivate->refreshPipelineStates = false;
+ }
+
+ const bool bReset = (contextPrivate->dispatchCount == 0) || params->reset;
+
+ FFX_ASSERT_MESSAGE(!contextPrivate->asyncSupported || bReset || (params->frameID > contextPrivate->previousFrameID),
+ "When async support is enabled, and the reset flag is not set, frame ID must increment in each dispatch");
+
+ // Detect disjoint frameID values
+ const bool bFrameID_Decreased = params->frameID < contextPrivate->previousFrameID;
+ const bool bFrameID_Skipped = (params->frameID - contextPrivate->previousFrameID) > 1;
+ const bool bDisjointFrameID = bFrameID_Decreased || bFrameID_Skipped;
+ contextPrivate->previousFrameID = params->frameID;
+ contextPrivate->dispatchCount++;
+
+ contextPrivate->constants.renderSize[0] = params->renderSize.width;
+ contextPrivate->constants.renderSize[1] = params->renderSize.height;
+ contextPrivate->constants.displaySize[0] = params->displaySize.width;
+ contextPrivate->constants.displaySize[1] = params->displaySize.height;
+ contextPrivate->constants.displaySizeRcp[0] = 1.0f / params->displaySize.width;
+ contextPrivate->constants.displaySizeRcp[1] = 1.0f / params->displaySize.height;
+ contextPrivate->constants.upscalerTargetSize[0] = params->interpolationRect.width;
+ contextPrivate->constants.upscalerTargetSize[1] = params->interpolationRect.height;
+ contextPrivate->constants.Mode = 0;
+ contextPrivate->constants.Reset = bReset || bDisjointFrameID;
+ contextPrivate->constants.deltaTime = params->frameTimeDelta;
+ contextPrivate->constants.HUDLessAttachedFactor = params->currentBackBuffer_HUDLess.resource ? 1 : 0;
+
+ contextPrivate->constants.opticalFlowScale[0] = params->opticalFlowScale.x;
+ contextPrivate->constants.opticalFlowScale[1] = params->opticalFlowScale.y;
+ contextPrivate->constants.opticalFlowBlockSize = params->opticalFlowBlockSize;// displaySize.width / params->opticalFlowBufferSize.width;
+ contextPrivate->constants.dispatchFlags = params->flags;
+
+ contextPrivate->constants.cameraNear = params->cameraNear;
+ contextPrivate->constants.cameraFar = params->cameraFar;
+
+ contextPrivate->constants.interpolationRectBase[0] = params->interpolationRect.left;
+ contextPrivate->constants.interpolationRectBase[1] = params->interpolationRect.top;
+ contextPrivate->constants.interpolationRectSize[0] = params->interpolationRect.width;
+ contextPrivate->constants.interpolationRectSize[1] = params->interpolationRect.height;
+
+ // Debug bar
+ static size_t dbgIdx = 0;
+ memcpy(contextPrivate->constants.debugBarColor, &debugBarColorSequence[dbgIdx * 3], 3 * sizeof(float));
+ dbgIdx = (dbgIdx + 1) % debugBarColorSequenceLength;
+
+ contextPrivate->constants.backBufferTransferFunction = params->backBufferTransferFunction;
+ contextPrivate->constants.minMaxLuminance[0] = params->minMaxLuminance[0];
+ contextPrivate->constants.minMaxLuminance[1] = params->minMaxLuminance[1];
+
+ const float aspectRatio = (float)params->renderSize.width / (float)params->renderSize.height;
+ const float cameraAngleHorizontal = atan(tan(params->cameraFovAngleVertical / 2) * aspectRatio) * 2;
+ contextPrivate->constants.fTanHalfFOV = tanf(cameraAngleHorizontal * 0.5f);
+
+ const bool bUseExternalDistortionFieldResource = !ffxFrameInterpolationResourceIsNull(params->distortionField);
+ if (bUseExternalDistortionFieldResource)
+ {
+ contextPrivate->constants.distortionFieldSize[0] = params->distortionField.description.width;
+ contextPrivate->constants.distortionFieldSize[1] = params->distortionField.description.height;
+ }
+ else
+ {
+ contextPrivate->constants.distortionFieldSize[0] = 1;
+ contextPrivate->constants.distortionFieldSize[1] = 1;
+ }
+
+ contextPrivate->renderDescription.cameraFar = params->cameraFar;
+ contextPrivate->renderDescription.cameraNear = params->cameraNear;
+ contextPrivate->renderDescription.viewSpaceToMetersFactor = (params->viewSpaceToMetersFactor > 0.0f) ? params->viewSpaceToMetersFactor : 1.0f;
+ contextPrivate->renderDescription.cameraFovAngleVertical = params->cameraFovAngleVertical;
+ contextPrivate->renderDescription.renderSize = params->renderSize;
+ contextPrivate->renderDescription.upscaleSize = params->displaySize;
+ setupDeviceDepthToViewSpaceDepthParams(contextPrivate, renderDesc, &contextPrivate->constants);
+
+ contextPrivate->contextDescription.backendInterface.fpStageConstantBufferDataFunc(
+ &contextPrivate->contextDescription.backendInterface,
+ &contextPrivate->constants,
+ sizeof(contextPrivate->constants),
+ &contextPrivate->constantBuffers[FFX_FRAMEINTERPOLATION_CONSTANTBUFFER_IDENTIFIER]);
+
+ if (contextPrivate->constants.HUDLessAttachedFactor == 1) {
+
+ FFX_ASSERT_MESSAGE(contextPrivate->contextDescription.previousInterpolationSourceFormat == params->currentBackBuffer_HUDLess.description.format,
+ "Dispatch FI param currentBackBuffer_HUDLess format and Create FG Context's hudlessBackBufferFormat have to be identical. Otherwise, CopyTextureRegion from FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE to FI_PreviousInterpolationSource would fail");
+
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->currentBackBuffer, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PRESENT_BACKBUFFER]);
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->currentBackBuffer_HUDLess, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE]);
+ }
+ else {
+ FFX_ASSERT_MESSAGE(contextPrivate->contextDescription.previousInterpolationSourceFormat == params->currentBackBuffer.description.format,
+ "Dispatch FI param currentBackBuffer format and Create FG Context's backBufferFormat have to be identical. This assert can also be triggered if create FG Context with optional hudlessBackBufferFormat that is different from backBufferFormat and Dispatch FI param's currentBackBuffer_HUDLess is null.");
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->currentBackBuffer, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE]);
+ contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PRESENT_BACKBUFFER] = contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE];
+ }
+
+ if (!ffxFrameInterpolationResourceIsNull(params->dilatedDepth))
+ {
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(
+ &contextPrivate->contextDescription.backendInterface,
+ ¶ms->dilatedDepth,
+ contextPrivate->effectContextId,
+ &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH]);
+ }
+ if (!ffxFrameInterpolationResourceIsNull(params->dilatedMotionVectors))
+ {
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(
+ &contextPrivate->contextDescription.backendInterface,
+ ¶ms->dilatedMotionVectors,
+ contextPrivate->effectContextId,
+ &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS]);
+ }
+ if (!ffxFrameInterpolationResourceIsNull(params->reconstructedPrevDepth))
+ {
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(
+ &contextPrivate->contextDescription.backendInterface,
+ ¶ms->reconstructedPrevDepth,
+ contextPrivate->effectContextId,
+ &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME]);
+ }
+
+ // Register output as SRV and UAV
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->output, contextPrivate->effectContextId, &contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT]);
+ contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT] = contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT];
+
+ // set optical flow buffers
+ if (params->opticalFlowScale.x > 0)
+ {
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->opticalFlowVector, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR]);
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->opticalFlowSceneChangeDetection, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCENE_CHANGE_DETECTION]);
+ }
+ else
+ {
+ contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_CONFIDENCE] = {};
+ contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_GLOBAL_MOTION] = {};
+ contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCENE_CHANGE_DETECTION] = {};
+ contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR] = {};
+ }
+
+ if (bUseExternalDistortionFieldResource)
+ {
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(
+ &contextPrivate->contextDescription.backendInterface,
+ ¶ms->distortionField,
+ contextPrivate->effectContextId,
+ &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISTORTION_FIELD]);
+ }
+ else
+ {
+ contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISTORTION_FIELD] = contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEFAULT_DISTORTION_FIELD];
+ }
+
+ uint32_t displayDispatchSizeX = uint32_t(params->displaySize.width + 7) / 8;
+ uint32_t displayDispatchSizeY = uint32_t(params->displaySize.height + 7) / 8;
+
+ uint32_t renderDispatchSizeX = uint32_t(params->renderSize.width + 7) / 8;
+ uint32_t renderDispatchSizeY = uint32_t(params->renderSize.height + 7) / 8;
+
+ uint32_t opticalFlowDispatchSizeX = uint32_t(params->displaySize.width / float(params->opticalFlowBlockSize) + 7) / 8;
+ uint32_t opticalFlowDispatchSizeY = uint32_t(params->displaySize.height / float(params->opticalFlowBlockSize) + 7) / 8;
+
+ const bool bExecutePreparationPasses = (false == contextPrivate->constants.Reset);
+
+ // Schedule work for the interpolation command list
+ {
+ FfxResourceInternal aliasableResources[] = {
+ contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME],
+ contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_X],
+ contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_Y],
+ contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID],
+ contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X],
+ contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y],
+ contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_MASK],
+ contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISOCCLUSION_MASK]
+ };
+ for (int i = 0; i < _countof(aliasableResources); ++i)
+ {
+ FfxGpuJobDescription discardJob = {FFX_GPU_JOB_DISCARD};
+ discardJob.discardJobDescriptor.target = aliasableResources[i];
+ contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &discardJob);
+ }
+
+ scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiSetup, renderDispatchSizeX, renderDispatchSizeY);
+
+ // game vector field inpainting pyramid
+ auto scheduleDispatchGameVectorFieldInpaintingPyramid = [&]() {
+ // Auto exposure
+ uint32_t dispatchThreadGroupCountXY[2];
+ uint32_t workGroupOffset[2];
+ uint32_t numWorkGroupsAndMips[2];
+ uint32_t rectInfo[4] = {0, 0, params->renderSize.width, params->renderSize.height};
+ ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo);
+
+ // downsample
+ contextPrivate->inpaintingPyramidContants.numworkGroups = numWorkGroupsAndMips[0];
+ contextPrivate->inpaintingPyramidContants.mips = numWorkGroupsAndMips[1];
+ contextPrivate->inpaintingPyramidContants.workGroupOffset[0] = workGroupOffset[0];
+ contextPrivate->inpaintingPyramidContants.workGroupOffset[1] = workGroupOffset[1];
+
+ contextPrivate->contextDescription.backendInterface.fpStageConstantBufferDataFunc(
+ &contextPrivate->contextDescription.backendInterface,
+ &contextPrivate->inpaintingPyramidContants,
+ sizeof(contextPrivate->inpaintingPyramidContants),
+ &contextPrivate->constantBuffers[FFX_FRAMEINTERPOLATION_INPAINTING_PYRAMID_CONSTANTBUFFER_IDENTIFIER]);
+
+ scheduleDispatch(
+ contextPrivate, &contextPrivate->pipelineGameVectorFieldInpaintingPyramid, dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1]);
+ };
+
+ // only execute FG data preparation passes when reset wasnt triggered
+ if (bExecutePreparationPasses)
+ {
+ // clear estimated depth resources
+ {
+ FfxGpuJobDescription clearJob = {FFX_GPU_JOB_CLEAR_FLOAT};
+
+ const bool bInverted =
+ (contextPrivate->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED) == FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED;
+ const float clearDepthValue[]{bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f};
+ memcpy(clearJob.clearJobDescriptor.color, clearDepthValue, 4 * sizeof(float));
+
+ wcscpy_s(clearJob.jobLabel, L"Clear Reconstructed Depth Interpolated Frame");
+ clearJob.clearJobDescriptor.target =
+ contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME];
+ contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &clearJob);
+ }
+
+ scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiReconstructPreviousDepth, renderDispatchSizeX, renderDispatchSizeY);
+ scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiGameMotionVectorField, renderDispatchSizeX, renderDispatchSizeY);
+
+ scheduleDispatchGameVectorFieldInpaintingPyramid();
+
+ scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiOpticalFlowVectorField, opticalFlowDispatchSizeX, opticalFlowDispatchSizeY);
+
+ scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiDisocclusionMask, renderDispatchSizeX, renderDispatchSizeY);
+ }
+
+ scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiScfi, displayDispatchSizeX, displayDispatchSizeY);
+
+ // inpainting pyramid
+ {
+ // Auto exposure
+ uint32_t dispatchThreadGroupCountXY[2];
+ uint32_t workGroupOffset[2];
+ uint32_t numWorkGroupsAndMips[2];
+ uint32_t rectInfo[4] = { 0, 0, params->displaySize.width, params->displaySize.height };
+ ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo);
+
+ // downsample
+ contextPrivate->inpaintingPyramidContants.numworkGroups = numWorkGroupsAndMips[0];
+ contextPrivate->inpaintingPyramidContants.mips = numWorkGroupsAndMips[1];
+ contextPrivate->inpaintingPyramidContants.workGroupOffset[0] = workGroupOffset[0];
+ contextPrivate->inpaintingPyramidContants.workGroupOffset[1] = workGroupOffset[1];
+
+ contextPrivate->contextDescription.backendInterface.fpStageConstantBufferDataFunc(
+ &contextPrivate->contextDescription.backendInterface,
+ &contextPrivate->inpaintingPyramidContants,
+ sizeof(contextPrivate->inpaintingPyramidContants),
+ &contextPrivate->constantBuffers[FFX_FRAMEINTERPOLATION_INPAINTING_PYRAMID_CONSTANTBUFFER_IDENTIFIER]);
+
+ scheduleDispatch(contextPrivate, &contextPrivate->pipelineInpaintingPyramid, dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1]);
+ }
+
+ scheduleDispatch(contextPrivate, &contextPrivate->pipelineInpainting, displayDispatchSizeX, displayDispatchSizeY);
+
+ if (params->flags & FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_VIEW)
+ {
+ scheduleDispatchGameVectorFieldInpaintingPyramid();
+ scheduleDispatch(contextPrivate, &contextPrivate->pipelineDebugView, displayDispatchSizeX, displayDispatchSizeY);
+ }
+
+ // store current buffer
+ {
+ FfxGpuJobDescription copyJobs[] = { {FFX_GPU_JOB_COPY} };
+ FfxResourceInternal copySources[_countof(copyJobs)] = { contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE] };
+ FfxResourceInternal destSources[_countof(copyJobs)] = { contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PREVIOUS_INTERPOLATION_SOURCE] };
+
+ for (int i = 0; i < _countof(copyJobs); ++i)
+ {
+ copyJobs[i].copyJobDescriptor.src = copySources[i];
+ copyJobs[i].copyJobDescriptor.dst = destSources[i];
+ contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, ©Jobs[i]);
+ }
+ }
+
+ // declare internal resources needed
+ struct FfxInternalResourceStates
+ {
+ FfxUInt32 id;
+ FfxResourceUsage usage;
+ };
+ const FfxInternalResourceStates internalSurfaceDesc[] = {
+
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME, FFX_RESOURCE_USAGE_UAV},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_X, FFX_RESOURCE_USAGE_UAV},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_Y, FFX_RESOURCE_USAGE_UAV},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID, FFX_RESOURCE_USAGE_UAV},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS, FFX_RESOURCE_USAGE_UAV},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X, FFX_RESOURCE_USAGE_UAV},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y, FFX_RESOURCE_USAGE_UAV},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PREVIOUS_INTERPOLATION_SOURCE, FFX_RESOURCE_USAGE_UAV},
+ {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISOCCLUSION_MASK, FFX_RESOURCE_USAGE_UAV},
+ };
+
+ for (int32_t currentSurfaceIndex = 0; currentSurfaceIndex < FFX_ARRAY_ELEMENTS(internalSurfaceDesc); ++currentSurfaceIndex) {
+
+ const FfxInternalResourceStates* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex];
+ FfxResourceStates initialState = FFX_RESOURCE_STATE_UNORDERED_ACCESS;
+ if (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_READ_ONLY) initialState = FFX_RESOURCE_STATE_COMPUTE_READ;
+ if (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_RENDERTARGET) initialState = FFX_RESOURCE_STATE_RENDER_TARGET;
+
+ FfxGpuJobDescription barrier = {FFX_GPU_JOB_BARRIER};
+ barrier.barrierDescriptor.resource = contextPrivate->srvResources[currentSurfaceDescription->id];
+ barrier.barrierDescriptor.subResourceID = 0;
+ barrier.barrierDescriptor.newState = (currentSurfaceDescription->id == FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS) ? FFX_RESOURCE_STATE_COPY_DEST : initialState;
+ barrier.barrierDescriptor.barrierType = FFX_BARRIER_TYPE_TRANSITION;
+ contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &barrier);
+ }
+
+ // schedule optical flow and frame interpolation
+ contextPrivate->contextDescription.backendInterface.fpExecuteGpuJobs(&contextPrivate->contextDescription.backendInterface, params->commandList, contextPrivate->effectContextId);
+ }
+
+ // release dynamic resources
+ contextPrivate->contextDescription.backendInterface.fpUnregisterResources(&contextPrivate->contextDescription.backendInterface, params->commandList, contextPrivate->effectContextId);
+
+ return FFX_OK;
+}
+
+FFX_API FfxVersionNumber ffxFrameInterpolationGetEffectVersion()
+{
+ return FFX_SDK_MAKE_VERSION(FFX_FRAMEINTERPOLATION_VERSION_MAJOR, FFX_FRAMEINTERPOLATION_VERSION_MINOR, FFX_FRAMEINTERPOLATION_VERSION_PATCH);
+}
+
+FFX_API FfxErrorCode ffxFrameInterpolationSetGlobalDebugMessage(ffxMessageCallback fpMessage, uint32_t debugLevel)
+{
+ ffxSetPrintMessageCallback(fpMessage, debugLevel);
+ return FFX_OK;
+}
diff --git a/thirdparty/amd-ffx/ffx_frameinterpolation.h b/thirdparty/amd-ffx/ffx_frameinterpolation.h
new file mode 100644
index 000000000000..ed19525e07b5
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_frameinterpolation.h
@@ -0,0 +1,316 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// @defgroup FRAMEINTERPOLATION
+
+#pragma once
+
+// Include the interface for the backend of the Frameinterpolation API.
+#include "ffx_interface.h"
+
+/// FidelityFX Frameinterpolation major version.
+///
+/// @ingroup FRAMEINTERPOLATIONFRAMEINTERPOLATION
+#define FFX_FRAMEINTERPOLATION_VERSION_MAJOR (1)
+
+/// FidelityFX Frameinterpolation minor version.
+///
+/// @ingroup FRAMEINTERPOLATIONFRAMEINTERPOLATION
+#define FFX_FRAMEINTERPOLATION_VERSION_MINOR (1)
+
+/// FidelityFX Frameinterpolation patch version.
+///
+/// @ingroup FRAMEINTERPOLATIONFRAMEINTERPOLATION
+#define FFX_FRAMEINTERPOLATION_VERSION_PATCH (3)
+
+/// FidelityFX Frame Interpolation context count
+///
+/// Defines the number of internal effect contexts required by Frame Interpolation
+///
+/// @ingroup ffxFrameInterpolation
+#define FFX_FRAMEINTERPOLATION_CONTEXT_COUNT (1)
+
+/// The size of the context specified in 32bit values.
+///
+/// @ingroup FRAMEINTERPOLATIONFRAMEINTERPOLATION
+#define FFX_FRAMEINTERPOLATION_CONTEXT_SIZE (FFX_SDK_DEFAULT_CONTEXT_SIZE)
+
+#if defined(__cplusplus)
+extern "C" {
+#endif // #if defined(__cplusplus)
+
+/// An enumeration of all the passes which constitute the FSR3 algorithm.
+///
+/// FSR3 is implemented as a composite of several compute passes each
+/// computing a key part of the final result. Each call to the
+/// FfxFsr3ScheduleGpuJobFunc callback function will
+/// correspond to a single pass included in FfxFsr3Pass. For a
+/// more comprehensive description of each pass, please refer to the FSR3
+/// reference documentation.
+///
+/// Please note in some cases e.g.: FFX_FSR3_PASS_ACCUMULATE
+/// and FFX_FSR3_PASS_ACCUMULATE_SHARPEN either one pass or the
+/// other will be used (they are mutually exclusive). The choice of which will
+/// depend on the way the FfxFsr3Context is created and the
+/// precise contents of FfxFsr3DispatchParamters each time a call
+/// is made to ffxFsr3ContextDispatch.
+///
+/// @ingroup FRAMEINTERPOLATIONFRAMEINTERPOLATION
+typedef enum FfxFrameInterpolationPass
+{
+ FFX_FRAMEINTERPOLATION_PASS_RECONSTRUCT_AND_DILATE,
+ FFX_FRAMEINTERPOLATION_PASS_SETUP,
+ FFX_FRAMEINTERPOLATION_PASS_RECONSTRUCT_PREV_DEPTH,
+ FFX_FRAMEINTERPOLATION_PASS_GAME_MOTION_VECTOR_FIELD,
+ FFX_FRAMEINTERPOLATION_PASS_OPTICAL_FLOW_VECTOR_FIELD,
+ FFX_FRAMEINTERPOLATION_PASS_DISOCCLUSION_MASK,
+ FFX_FRAMEINTERPOLATION_PASS_INTERPOLATION,
+ FFX_FRAMEINTERPOLATION_PASS_INPAINTING_PYRAMID,
+ FFX_FRAMEINTERPOLATION_PASS_INPAINTING,
+ FFX_FRAMEINTERPOLATION_PASS_GAME_VECTOR_FIELD_INPAINTING_PYRAMID,
+ FFX_FRAMEINTERPOLATION_PASS_DEBUG_VIEW,
+ FFX_FRAMEINTERPOLATION_PASS_COUNT ///< The number of passes performed by FrameInterpolation.
+} FfxFrameInterpolationPass;
+
+// forward declarations
+struct FfxFrameInterpolationContext;
+
+/// An enumeration of bit flags used when creating a
+/// FfxFrameInterpolationContext. See FfxFrameInterpolationContextDescription.
+///
+/// @ingroup FRAMEINTERPOLATIONFRAMEINTERPOLATION
+typedef enum FfxFrameInterpolationInitializationFlagBits {
+
+ FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED = (1<<0), ///< A bit indicating that the input depth buffer data provided is inverted [1..0].
+ FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INFINITE = (1<<1), ///< A bit indicating that the input depth buffer data provided is using an infinite far plane.
+ FFX_FRAMEINTERPOLATION_ENABLE_TEXTURE1D_USAGE = (1<<2), ///< A bit indicating that the backend should use 1D textures.
+ FFX_FRAMEINTERPOLATION_ENABLE_HDR_COLOR_INPUT = (1<<3), ///< A bit indicating that HDR values are present in the imaging pipeline.
+ FFX_FRAMEINTERPOLATION_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS = (1<<4), ///< A bit indicating if the motion vectors are rendered at display resolution.
+ FFX_FRAMEINTERPOLATION_ENABLE_JITTER_MOTION_VECTORS = (1<<5),
+ FFX_FRAMEINTERPOLATION_ENABLE_ASYNC_SUPPORT = (1<<6),
+ FFX_FRAMEINTERPOLATION_ENABLE_DEBUG_CHECKING = (1<<7), ///< A bit indicating that the runtime should check some API values and report issues.
+} FfxFrameInterpolationInitializationFlagBits;
+
+/// A structure encapsulating the parameters required to initialize
+/// FidelityFX Frameinterpolation.
+///
+/// @ingroup FRAMEINTERPOLATION
+typedef struct FfxFrameInterpolationContextDescription {
+ uint32_t flags; ///< A collection of FfxFrameInterpolationInitializationFlagBits.
+ FfxDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at.
+ FfxDimensions2D displaySize; ///< The size of the presentation resolution
+ FfxSurfaceFormat backBufferFormat; ///< the format of the backbuffer
+ FfxSurfaceFormat previousInterpolationSourceFormat; ///< the format of the texture that will store the interpolation source for the next frame. Can be different than the backbuffer one, especially when using hudless
+ FfxInterface backendInterface; ///< A set of pointers to the backend implementation for FidelityFX SDK
+} FfxFrameInterpolationContextDescription;
+
+/// A structure encapsulating the resource descriptions for shared resources for this effect.
+///
+/// @ingroup FRAMEINTERPOLATION
+typedef struct FfxFrameInterpolationSharedResourceDescriptions
+{
+ FfxCreateResourceDescription reconstructedPrevNearestDepth; ///< The FfxCreateResourceDescription for allocating the reconstructedPrevNearestDepth shared resource.
+ FfxCreateResourceDescription dilatedDepth; ///< The FfxCreateResourceDescription for allocating the dilatedDepth shared resource.
+ FfxCreateResourceDescription dilatedMotionVectors; ///< The FfxCreateResourceDescription for allocating the dilatedMotionVectors shared resource.
+} FfxFrameInterpolationSharedResourceDescriptions;
+
+/// A structure encapsulating the FidelityFX Super Resolution 2 context.
+///
+/// This sets up an object which contains all persistent internal data and
+/// resources that are required by FSR3.
+///
+/// The FfxFsr3Context object should have a lifetime matching
+/// your use of FSR3. Before destroying the FSR3 context care should be taken
+/// to ensure the GPU is not accessing the resources created or used by FSR3.
+/// It is therefore recommended that the GPU is idle before destroying the
+/// FSR3 context.
+///
+/// @ingroup FRAMEINTERPOLATION
+typedef struct FfxFrameInterpolationContext
+{
+ uint32_t data[FFX_FRAMEINTERPOLATION_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context.
+} FfxFrameInterpolationContext;
+
+
+/// Create a FidelityFX Super Resolution 2 context from the parameters
+/// programmed to the FfxFsr3CreateParams structure.
+///
+/// The context structure is the main object used to interact with the FSR3
+/// API, and is responsible for the management of the internal resources used
+/// by the FSR3 algorithm. When this API is called, multiple calls will be
+/// made via the pointers contained in the callbacks structure.
+/// These callbacks will attempt to retreive the device capabilities, and
+/// create the internal resources, and pipelines required by FSR3's
+/// frame-to-frame function. Depending on the precise configuration used when
+/// creating the FfxFsr3Context a different set of resources and
+/// pipelines might be requested via the callback functions.
+///
+/// The flags included in the flags field of
+/// FfxFsr3Context how match the configuration of your
+/// application as well as the intended use of FSR3. It is important that these
+/// flags are set correctly (as well as a correct programmed
+/// FfxFsr3DispatchDescription) to ensure correct operation. It is
+/// recommended to consult the overview documentation for further details on
+/// how FSR3 should be integerated into an application.
+///
+/// When the FfxFsr3Context is created, you should use the
+/// ffxFsr3ContextDispatch function each frame where FSR3
+/// upscaling should be applied. See the documentation of
+/// ffxFsr3ContextDispatch for more details.
+///
+/// The FfxFsr3Context should be destroyed when use of it is
+/// completed, typically when an application is unloaded or FSR3 upscaling is
+/// disabled by a user. To destroy the FSR3 context you should call
+/// ffxFsr3ContextDestroy.
+///
+/// @param [out] context A pointer to a FfxFsr3Context structure to populate.
+/// @param [in] contextDescription A pointer to a FfxFsr3ContextDescription structure.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or contextDescription was NULL.
+/// @retval
+/// FFX_ERROR_INCOMPLETE_INTERFACE The operation failed because the FfxFsr3ContextDescription.callbacks was not fully specified.
+/// @retval
+/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend.
+///
+/// @ingroup FRAMEINTERPOLATION
+FFX_API FfxErrorCode ffxFrameInterpolationContextCreate(FfxFrameInterpolationContext* context, FfxFrameInterpolationContextDescription* contextDescription);
+
+FFX_API FfxErrorCode ffxFrameInterpolationContextGetGpuMemoryUsage(FfxFrameInterpolationContext* pContext, FfxEffectMemoryUsage* vramUsage);
+
+FFX_API FfxErrorCode ffxFrameInterpolationGetSharedResourceDescriptions(FfxFrameInterpolationContext* pContext, FfxFrameInterpolationSharedResourceDescriptions* SharedResources);
+
+FFX_API FfxErrorCode ffxSharedContextGetGpuMemoryUsage(FfxInterface* backendInterfaceShared, FfxEffectMemoryUsage* vramUsage);
+
+typedef struct FfxFrameInterpolationPrepareDescription
+{
+ uint32_t flags; ///< combination of FfxFrameInterpolationDispatchFlags
+ FfxCommandList commandList; ///< The FfxCommandList to record frame interpolation commands into.
+ FfxDimensions2D renderSize; ///< The dimensions used to render game content, dilatedDepth, dilatedMotionVectors are expected to be of ths size.
+ FfxFloatCoords2D jitterOffset; ///< The subpixel jitter offset applied to the camera. jitter;
+ FfxFloatCoords2D motionVectorScale; ///< The scale factor to apply to motion vectors. motionVectorScale;
+
+ float frameTimeDelta;
+ float cameraNear;
+ float cameraFar;
+ float viewSpaceToMetersFactor;
+ float cameraFovAngleVertical;
+
+ FfxResource depth; ///< The depth buffer data
+ FfxResource motionVectors; ///< The motion vector data
+ uint64_t frameID;
+
+ FfxResource dilatedDepth; ///< The dilated depth buffer data
+ FfxResource dilatedMotionVectors; ///< The dilated motion vector data
+ FfxResource reconstructedPrevDepth; ///< The reconstructed depth buffer data
+
+ FfxFloat32x3 cameraPosition; ///< The camera position in world space
+ FfxFloat32x3 cameraUp; ///< The camera up normalized vector in world space.
+ FfxFloat32x3 cameraRight; ///< The camera right normalized vector in world space.
+ FfxFloat32x3 cameraForward; ///< The camera forward normalized vector in world space.
+
+} FfxFrameInterpolationPrepareDescription;
+
+FFX_API FfxErrorCode ffxFrameInterpolationPrepare(FfxFrameInterpolationContext* context, const FfxFrameInterpolationPrepareDescription* params);
+
+typedef enum FfxFrameInterpolationDispatchFlags
+{
+ FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_TEAR_LINES = (1 << 0), ///< A bit indicating that the debug tear lines will be drawn to the interpolated output.
+ FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_RESET_INDICATORS = (1 << 1), ///< A bit indicating that the debug reset indicators will be drawn to the generated output.
+ FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_VIEW = (1 << 2), ///< A bit indicating that the interpolated output resource will contain debug views with relevant information.
+ FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_PACING_LINES = (1 << 3), ///< A bit indicating that the debug pacing lines will be drawn to the generated output.
+ FFX_FRAMEINTERPOLATION_DISPATCH_RESERVED_1 = (1 << 4),
+ FFX_FRAMEINTERPOLATION_DISPATCH_RESERVED_2 = (1 << 5),
+} FfxFrameInterpolationDispatchFlags;
+
+typedef struct FfxFrameInterpolationDispatchDescription {
+
+ uint32_t flags; ///< combination of FfxFrameInterpolationDispatchFlags
+ FfxCommandList commandList; ///< The FfxCommandList to record frame interpolation commands into.
+ FfxDimensions2D displaySize; ///< The destination output dimensions
+ FfxDimensions2D renderSize; ///< The dimensions used to render game content, dilatedDepth, dilatedMotionVectors are expected to be of ths size.
+ FfxResource currentBackBuffer; ///< The current presentation color, if currentBackBuffer_HUDLess is not used, this will be used as interpolation source data.
+ FfxResource currentBackBuffer_HUDLess; ///< The current presentation color without HUD content, when use it will be used as interpolation source data.
+ FfxResource output; ///< The output resource where to store the interpolated result.
+
+ FfxRect2D interpolationRect; ///< The area of the backbuffer that should be used for interpolation in case only a part of the screen is used e.g. due to movie bars
+
+ FfxResource opticalFlowVector; ///< The optical flow motion vectors (see example computation in the FfxOpticalFlow effect)
+ FfxResource opticalFlowSceneChangeDetection; ///< The optical flow scene change detection data
+ FfxDimensions2D opticalFlowBufferSize; ///< The optical flow motion vector resource dimensions
+ FfxFloatCoords2D opticalFlowScale; ///< The optical flow motion vector scale factor, used to scale resoure values into [0.0,1.0] range.
+ int opticalFlowBlockSize; ///< The optical flow block dimension size
+
+ float cameraNear; ///< The distance to the near plane of the camera.
+ float cameraFar; ///< The distance to the far plane of the camera. This is used only used in case of non infinite depth.
+ float cameraFovAngleVertical; ///< The camera angle field of view in the vertical direction (expressed in radians).
+ float viewSpaceToMetersFactor; ///< The unit to scale view space coordinates to meters.
+
+ float frameTimeDelta; ///< The time elapsed since the last frame (expressed in milliseconds).
+ bool reset; ///< A boolean value which when set to true, indicates the camera has moved discontinuously.
+
+ FfxBackbufferTransferFunction backBufferTransferFunction; ///< The transfer function use to convert interpolation source color data to linear RGB.
+ float minMaxLuminance[2]; ///< Min and max luminance values, used when converting HDR colors to linear RGB
+ uint64_t frameID; ///< Identifier used to select internal resources when async support is enabled. Must increment by exactly one (1) for each frame. Any non-exactly-one difference will reset the frame generation logic.
+
+ FfxResource dilatedDepth; ///< The dilated depth buffer data
+ FfxResource dilatedMotionVectors; ///< The dilated motion vector data
+ FfxResource reconstructedPrevDepth; ///< The reconstructed depth buffer data
+
+ FfxResource distortionField; ///< A resource containing distortion offset data used when distortion post effects are enabled.
+} FfxFrameInterpolationDispatchDescription;
+
+FFX_API FfxErrorCode ffxFrameInterpolationDispatch(FfxFrameInterpolationContext* context, const FfxFrameInterpolationDispatchDescription* params);
+
+/// Destroy the FidelityFX Super Resolution context.
+///
+/// @param [out] context A pointer to a FfxFsr3Context structure to destroy.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context was NULL.
+///
+/// @ingroup FRAMEINTERPOLATION
+FFX_API FfxErrorCode ffxFrameInterpolationContextDestroy(FfxFrameInterpolationContext* context);
+
+/// Queries the effect version number.
+///
+/// @returns
+/// The SDK version the effect was built with.
+///
+/// @ingroup FRAMEINTERPOLATION
+FFX_API FfxVersionNumber ffxFrameInterpolationGetEffectVersion();
+
+/// Set global debug message settings
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+///
+/// @ingroup FRAMEINTERPOLATION
+FFX_API FfxErrorCode ffxFrameInterpolationSetGlobalDebugMessage(ffxMessageCallback fpMessage, uint32_t debugLevel);
+
+#if defined(__cplusplus)
+}
+#endif // #if defined(__cplusplus)
diff --git a/thirdparty/amd-ffx/ffx_frameinterpolation_private.h b/thirdparty/amd-ffx/ffx_frameinterpolation_private.h
new file mode 100644
index 000000000000..a338f7f1924f
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_frameinterpolation_private.h
@@ -0,0 +1,147 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+#include "gpu/frameinterpolation/ffx_frameinterpolation_resources.h"
+
+/// An enumeration of all the permutations that can be passed to the FSR3 algorithm.
+///
+/// FSR3 features are organized through a set of pre-defined compile
+/// permutation options that need to be specified. Which shader blob
+/// is returned for pipeline creation will be determined by what combination
+/// of shader permutations are enabled.
+///
+/// @ingroup FRAMEINTERPOLATION
+typedef enum FrameInterpolationShaderPermutationOptions
+{
+ FRAMEINTERPOLATION_SHADER_PERMUTATION_LOW_RES_MOTION_VECTORS = (1 << 0),
+ FRAMEINTERPOLATION_SHADER_PERMUTATION_JITTER_MOTION_VECTORS = (1 << 1),
+ FRAMEINTERPOLATION_SHADER_PERMUTATION_DEPTH_INVERTED = (1 << 2), ///< Indicates input resources were generated with inverted depth
+ FRAMEINTERPOLATION_SHADER_PERMUTATION_FORCE_WAVE64 = (1 << 3), ///< doesn't map to a define, selects different table
+ FRAMEINTERPOLATION_SHADER_PERMUTATION_ALLOW_FP16 = (1 << 4), ///< Enables fast math computations where possible
+} FrameInterpolationShaderPermutationOptions;
+
+typedef struct FrameInterpolationConstants
+{
+ int32_t renderSize[2];
+ int32_t displaySize[2];
+
+ float displaySizeRcp[2];
+ float cameraNear;
+ float cameraFar;
+
+ int32_t upscalerTargetSize[2]; // how is that different from display size?
+ int Mode;
+ int Reset;
+
+ float deviceToViewDepth[4];
+
+ float deltaTime;
+ int HUDLessAttachedFactor;
+ int32_t distortionFieldSize[2];
+
+ float opticalFlowScale[2];
+ int32_t opticalFlowBlockSize;
+ uint32_t dispatchFlags;
+
+ int32_t maxRenderSize[2];
+ int opticalFlowHalfResMode;
+ int numInstances;
+
+ int32_t interpolationRectBase[2];
+ int32_t interpolationRectSize[2];
+
+ float debugBarColor[3];
+ uint32_t backBufferTransferFunction;
+
+ float minMaxLuminance[2];
+ float fTanHalfFOV;
+ float _pad1;
+
+ float jitter[2];
+ float motionVectorScale[2];
+} FrameInterpolationConstants;
+
+typedef struct InpaintingPyramidConstants {
+
+ uint32_t mips;
+ uint32_t numworkGroups;
+ uint32_t workGroupOffset[2];
+} InpaintingPyramidConstants;
+
+struct FfxDeviceCapabilities;
+struct FfxPipelineState;
+struct FfxResource;
+
+typedef struct FfxFrameInterpolationRenderDescription
+{
+ FfxDimensions2D renderSize;
+ FfxDimensions2D upscaleSize;
+
+ float cameraNear;
+ float cameraFar;
+ float cameraFovAngleVertical;
+ float viewSpaceToMetersFactor;
+
+ FfxFloatCoords2D motionVectorScale; ///< The scale factor to apply to motion vectors.
+} FfxFrameInterpolationRenderDescription;
+
+// FfxFsr3Context_Private
+// The private implementation of the FSR3 context.
+typedef struct FfxFrameInterpolationContext_Private {
+
+ FfxFrameInterpolationContextDescription contextDescription;
+ FfxUInt32 effectContextId;
+ FfxFrameInterpolationRenderDescription renderDescription;
+ FrameInterpolationConstants constants;
+ InpaintingPyramidConstants inpaintingPyramidContants;
+ FfxDevice device;
+ FfxDeviceCapabilities deviceCapabilities;
+
+ // FrameInterpolation Pipelines
+ FfxPipelineState pipelineFiReconstructAndDilate;
+ FfxPipelineState pipelineFiSetup;
+ FfxPipelineState pipelineFiReconstructPreviousDepth;
+ FfxPipelineState pipelineFiGameMotionVectorField;
+ FfxPipelineState pipelineFiOpticalFlowVectorField;
+ FfxPipelineState pipelineFiDisocclusionMask;
+ FfxPipelineState pipelineFiScfi;
+ FfxPipelineState pipelineInpaintingPyramid;
+ FfxPipelineState pipelineInpainting;
+ FfxPipelineState pipelineGameVectorFieldInpaintingPyramid;
+ FfxPipelineState pipelineDebugView;
+
+ FfxConstantBuffer constantBuffers[FFX_FRAMEINTERPOLATION_CONSTANTBUFFER_COUNT];
+
+ // 2 arrays of resources, as e.g. FFX_FSR3_RESOURCE_IDENTIFIER_LOCK_STATUS will use different resources when bound as SRV vs when bound as UAV
+ FfxResourceInternal srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT];
+ FfxResourceInternal uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT];
+
+ bool firstExecution;
+ bool refreshPipelineStates;
+
+ bool asyncSupported;
+ uint64_t previousFrameID;
+ uint64_t dispatchCount;
+
+} FfxFrameInterpolationContext_Private;
diff --git a/thirdparty/amd-ffx/ffx_fsr1.cpp b/thirdparty/amd-ffx/ffx_fsr1.cpp
new file mode 100644
index 000000000000..e638b4814c1c
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_fsr1.cpp
@@ -0,0 +1,515 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include // for memset
+#include // for _countof
+#include // for fabs, abs, sinf, sqrt, etc.
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wsign-compare"
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4505)
+#endif
+
+#include "ffx_fsr1.h"
+#include "gpu/ffx_core.h"
+#include "gpu/fsr1/ffx_fsr1.h"
+#include "ffx_object_management.h"
+
+#include "ffx_fsr1_private.h"
+
+// lists to map shader resource bindpoint name to resource identifier
+typedef struct ResourceBinding
+{
+ uint32_t index;
+ wchar_t name[64];
+}ResourceBinding;
+
+static const ResourceBinding srvTextureBindingTable[] =
+{
+ {FFX_FSR1_RESOURCE_IDENTIFIER_INPUT_COLOR, L"r_input_color"},
+ {FFX_FSR1_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR, L"r_internal_upscaled_color"},
+ {FFX_FSR1_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT, L"r_upscaled_output" },
+};
+
+static const ResourceBinding uavTextureBindingTable[] =
+{
+ {FFX_FSR1_RESOURCE_IDENTIFIER_INPUT_COLOR, L"rw_input_color"},
+ {FFX_FSR1_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR, L"rw_internal_upscaled_color"},
+ {FFX_FSR1_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT, L"rw_upscaled_output"},
+};
+
+static const ResourceBinding cbResourceBindingTable[] =
+{
+ {FFX_FSR1_CONSTANTBUFFER_IDENTIFIER_FSR1, L"cbFSR1"},
+};
+
+static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
+{
+ for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvTextureCount; ++srvIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(srvTextureBindingTable); ++mapIndex)
+ {
+ if (0 == wcscmp(srvTextureBindingTable[mapIndex].name, inoutPipeline->srvTextureBindings[srvIndex].name))
+ break;
+ }
+ if (mapIndex == _countof(srvTextureBindingTable))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->srvTextureBindings[srvIndex].resourceIdentifier = srvTextureBindingTable[mapIndex].index;
+ }
+
+ for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavTextureCount; ++uavIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(uavTextureBindingTable); ++mapIndex)
+ {
+ if (0 == wcscmp(uavTextureBindingTable[mapIndex].name, inoutPipeline->uavTextureBindings[uavIndex].name))
+ break;
+ }
+ if (mapIndex == _countof(uavTextureBindingTable))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->uavTextureBindings[uavIndex].resourceIdentifier = uavTextureBindingTable[mapIndex].index;
+ }
+
+ for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex)
+ {
+ if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->constantBufferBindings[cbIndex].name))
+ break;
+ }
+ if (mapIndex == _countof(cbResourceBindingTable))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->constantBufferBindings[cbIndex].resourceIdentifier = cbResourceBindingTable[mapIndex].index;
+ }
+
+ return FFX_OK;
+}
+
+static uint32_t getPipelinePermutationFlags(uint32_t contextFlags, FfxFsr1Pass passId, bool fp16, bool force64)
+{
+ // work out what permutation to load.
+ uint32_t flags = 0;
+ flags |= (contextFlags & FFX_FSR1_RCAS_PASSTHROUGH_ALPHA) ? FSR1_SHADER_PERMUTATION_RCAS_PASSTHROUGH_ALPHA : 0;
+ flags |= (contextFlags & FFX_FSR1_ENABLE_SRGB_CONVERSIONS) ? FSR1_SHADER_PERMUTATION_SRGB_CONVERSIONS : 0;
+ flags |= (passId != FFX_FSR1_PASS_EASU) ? FSR1_SHADER_PERMUTATION_APPLY_RCAS : 0;
+ flags |= (force64) ? FSR1_SHADER_PERMUTATION_FORCE_WAVE64 : 0;
+#if defined(_GAMING_XBOX_SCARLETT)
+ // Never got reports about NaNs on Xbox
+ flags |= (fp16) ? FSR1_SHADER_PERMUTATION_ALLOW_FP16 : 0;
+#else
+ // Some NaNs have been observed on other hardware during Rcas with FP16
+ flags |= (fp16 && (passId != FFX_FSR1_PASS_RCAS)) ? FSR1_SHADER_PERMUTATION_ALLOW_FP16 : 0;
+#endif
+ return flags;
+}
+
+static FfxErrorCode createPipelineStates(FfxFsr1Context_Private* context)
+{
+ FFX_ASSERT(context);
+
+ FfxPipelineDescription pipelineDescription = {};
+ pipelineDescription.contextFlags = context->contextDescription.flags;
+
+ // Samplers
+ pipelineDescription.samplerCount = 1;
+ FfxSamplerDescription samplerDesc = { FFX_FILTER_TYPE_MINMAGMIP_LINEAR, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE };
+ pipelineDescription.samplers = &samplerDesc;
+
+ // Root constants
+ pipelineDescription.rootConstantBufferCount = 1;
+ FfxRootConstantDescription rootConstantDesc = { sizeof(Fsr1Constants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE };
+ pipelineDescription.rootConstants = &rootConstantDesc;
+
+ // Query device capabilities
+ FfxDeviceCapabilities capabilities;
+ context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &capabilities);
+
+ // Setup a few options used to determine permutation flags
+ bool haveShaderModel66 = capabilities.maximumSupportedShaderModel >= FFX_SHADER_MODEL_6_6;
+ bool supportedFP16 = capabilities.fp16Supported;
+ bool canForceWave64 = false;
+
+ const uint32_t waveLaneCountMin = capabilities.waveLaneCountMin;
+ const uint32_t waveLaneCountMax = capabilities.waveLaneCountMax;
+ if (waveLaneCountMin <= 64 && waveLaneCountMax >= 64)
+ canForceWave64 = haveShaderModel66;
+ else
+ canForceWave64 = false;
+
+ // Work out what permutation to load.
+ uint32_t contextFlags = context->contextDescription.flags;
+
+ // Set up pipeline descriptors (basically RootSignature and binding)
+ wcscpy_s(pipelineDescription.name, L"FSR1-EASU");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR1, FFX_FSR1_PASS_EASU,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR1_PASS_EASU, supportedFP16, canForceWave64),
+ &pipelineDescription, context->effectContextId, &context->pipelineEASU));
+ wcscpy_s(pipelineDescription.name, L"FSR1-EASU_RCAS");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR1, FFX_FSR1_PASS_EASU_RCAS,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR1_PASS_EASU_RCAS, supportedFP16, canForceWave64),
+ &pipelineDescription, context->effectContextId, &context->pipelineEASU_RCAS));
+ wcscpy_s(pipelineDescription.name, L"FSR1-RCAS");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR1, FFX_FSR1_PASS_RCAS,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR1_PASS_RCAS, supportedFP16, canForceWave64),
+ &pipelineDescription, context->effectContextId, &context->pipelineRCAS));
+
+ // For each pipeline: re-route/fix-up IDs based on names
+ patchResourceBindings(&context->pipelineEASU);
+ patchResourceBindings(&context->pipelineEASU_RCAS);
+ patchResourceBindings(&context->pipelineRCAS);
+
+ return FFX_OK;
+}
+
+static void scheduleDispatch(FfxFsr1Context_Private* context, const FfxFsr1DispatchDescription*, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY)
+{
+ FfxGpuJobDescription dispatchJob = {FFX_GPU_JOB_COMPUTE};
+ wcscpy_s(dispatchJob.jobLabel, pipeline->name);
+
+ for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) {
+
+ const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier;
+ const FfxResourceInternal currentResource = context->srvResources[currentResourceId];
+ dispatchJob.computeJobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource;
+#ifdef FFX_DEBUG
+ wcscpy_s(dispatchJob.computeJobDescriptor.srvTextures[currentShaderResourceViewIndex].name,
+ pipeline->srvTextureBindings[currentShaderResourceViewIndex].name);
+#endif
+ }
+
+ for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavTextureCount; ++currentUnorderedAccessViewIndex) {
+
+ const uint32_t currentResourceId = pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].resourceIdentifier;
+#ifdef FFX_DEBUG
+ wcscpy_s(dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].name,
+ pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].name);
+#endif
+ const FfxResourceInternal currentResource = context->uavResources[currentResourceId];
+ dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource;
+ dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = 0;
+ }
+
+ dispatchJob.computeJobDescriptor.dimensions[0] = dispatchX;
+ dispatchJob.computeJobDescriptor.dimensions[1] = dispatchY;
+ dispatchJob.computeJobDescriptor.dimensions[2] = 1;
+ dispatchJob.computeJobDescriptor.pipeline = *pipeline;
+
+#ifdef FFX_DEBUG
+ wcscpy_s(dispatchJob.computeJobDescriptor.cbNames[0], pipeline->constantBufferBindings[0].name);
+#endif
+ dispatchJob.computeJobDescriptor.cbs[0] = context->constantBuffer;
+
+
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &dispatchJob);
+}
+
+static FfxErrorCode fsr1Dispatch(FfxFsr1Context_Private* context, const FfxFsr1DispatchDescription* params)
+{
+ // take a short cut to the command list
+ FfxCommandList commandList = params->commandList;
+
+ // Register resources for frame
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->color, context->effectContextId, &context->srvResources[FFX_FSR1_RESOURCE_IDENTIFIER_INPUT_COLOR]);
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->output, context->effectContextId, &context->uavResources[FFX_FSR1_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT]);
+
+ // This value is the image region dimension that each thread group of the FSR shader operates on
+ static const int threadGroupWorkRegionDim = 16;
+ int dispatchX = FFX_DIVIDE_ROUNDING_UP(context->contextDescription.displaySize.width, threadGroupWorkRegionDim);
+ int dispatchY = FFX_DIVIDE_ROUNDING_UP(context->contextDescription.displaySize.height, threadGroupWorkRegionDim);
+
+ const bool doSharpen = params->enableSharpening && (context->contextDescription.flags & FFX_FSR1_ENABLE_RCAS);
+
+ // Easu constants
+ Fsr1Constants easuConst = {};
+ ffxFsrPopulateEasuConstants(reinterpret_cast(&easuConst.const0),
+ reinterpret_cast(&easuConst.const1),
+ reinterpret_cast(&easuConst.const2),
+ reinterpret_cast(&easuConst.const3),
+ static_cast(params->renderSize.width), static_cast(params->renderSize.height),
+ static_cast(params->color.description.width), static_cast(params->color.description.height),
+ static_cast(context->contextDescription.displaySize.width),
+ static_cast(context->contextDescription.displaySize.height));
+ easuConst.sample[0] = context->contextDescription.flags & FFX_FSR1_ENABLE_HIGH_DYNAMIC_RANGE;
+ context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(
+ &context->contextDescription.backendInterface,
+ &easuConst,
+ sizeof(Fsr1Constants),
+ &context->constantBuffer);
+ scheduleDispatch(context, params, doSharpen ? &context->pipelineEASU_RCAS : &context->pipelineEASU, dispatchX, dispatchY);
+
+ if (doSharpen)
+ {
+ // Rcas constants
+ Fsr1Constants rcasConst = {};
+ const float sharpenessRemapped = (-2.0f * params->sharpness) + 2.0f;
+ FsrRcasCon(reinterpret_cast(&rcasConst.const0), sharpenessRemapped);
+ rcasConst.sample[0] = context->contextDescription.flags & FFX_FSR1_ENABLE_HIGH_DYNAMIC_RANGE;
+ context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(
+ &context->contextDescription.backendInterface,
+ &rcasConst,
+ sizeof(Fsr1Constants),
+ &context->constantBuffer);
+ scheduleDispatch(context, params, &context->pipelineRCAS, dispatchX, dispatchY);
+ }
+
+ // Execute all the work for the frame
+ context->contextDescription.backendInterface.fpExecuteGpuJobs(&context->contextDescription.backendInterface, commandList, context->effectContextId);
+
+ // Release dynamic resources
+ context->contextDescription.backendInterface.fpUnregisterResources(&context->contextDescription.backendInterface, commandList, context->effectContextId);
+
+ return FFX_OK;
+}
+
+static FfxErrorCode fsr1Create(FfxFsr1Context_Private* context, const FfxFsr1ContextDescription* contextDescription)
+{
+ FFX_ASSERT(context);
+ FFX_ASSERT(contextDescription);
+
+ // Setup the data for implementation.
+ memset(context, 0, sizeof(FfxFsr1Context_Private));
+ context->device = contextDescription->backendInterface.device;
+
+ memcpy(&context->contextDescription, contextDescription, sizeof(FfxFsr1ContextDescription));
+
+ // Check version info - make sure we are linked with the right backend version
+ FfxVersionNumber version = context->contextDescription.backendInterface.fpGetSDKVersion(&context->contextDescription.backendInterface);
+ FFX_RETURN_ON_ERROR(version == FFX_SDK_MAKE_VERSION(1, 1, 4), FFX_ERROR_INVALID_VERSION);
+
+ // Setup constant buffer sizes.
+ context->constantBuffer.num32BitEntries = sizeof(Fsr1Constants) / sizeof(uint32_t);
+
+ // Create the context.
+ FfxErrorCode errorCode =
+ context->contextDescription.backendInterface.fpCreateBackendContext(&context->contextDescription.backendInterface, FFX_EFFECT_FSR1, nullptr, &context->effectContextId);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ // Call out for device caps.
+ errorCode = context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &context->deviceCapabilities);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ // Create the intermediate upscale resource if RCAS is enabled
+ const FfxInternalResourceDescription internalSurfaceDesc = {FFX_FSR1_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR,
+ L"FSR1_InternalUpscaledColor",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_UAV,
+ contextDescription->outputFormat,
+ contextDescription->displaySize.width,
+ contextDescription->displaySize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_ALIASABLE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}};
+
+ // Clear the SRV resources to NULL.
+ memset(context->srvResources, 0, sizeof(context->srvResources));
+
+ if (contextDescription->flags & FFX_FSR1_ENABLE_RCAS)
+ {
+ const FfxResourceDescription resourceDescription = {FFX_RESOURCE_TYPE_TEXTURE2D,
+ internalSurfaceDesc.format,
+ internalSurfaceDesc.width,
+ internalSurfaceDesc.height,
+ 1,
+ internalSurfaceDesc.mipCount,
+ internalSurfaceDesc.flags,
+ internalSurfaceDesc.usage};
+
+ const FfxCreateResourceDescription createResourceDescription = {FFX_HEAP_TYPE_DEFAULT,
+ resourceDescription,
+ FFX_RESOURCE_STATE_UNORDERED_ACCESS,
+ internalSurfaceDesc.name,
+ internalSurfaceDesc.id,
+ internalSurfaceDesc.initData};
+
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreateResource(&context->contextDescription.backendInterface, &createResourceDescription, context->effectContextId, &context->srvResources[internalSurfaceDesc.id]));
+ }
+
+ // And copy resources to uavResrouces list
+ memcpy(context->uavResources, context->srvResources, sizeof(context->srvResources));
+
+ // Create shaders on initialize.
+ errorCode = createPipelineStates(context);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ return FFX_OK;
+}
+
+static FfxErrorCode fsr1Release(FfxFsr1Context_Private* context)
+{
+ FFX_ASSERT(context);
+
+ // Release all pipelines
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineEASU, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineEASU_RCAS, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineRCAS, context->effectContextId);
+
+ // Unregister resources not created internally
+ context->srvResources[FFX_FSR1_RESOURCE_IDENTIFIER_INPUT_COLOR] = { FFX_FSR1_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR1_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT] = { FFX_FSR1_RESOURCE_IDENTIFIER_NULL };
+
+ // Release internal resource
+ ffxSafeReleaseResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR1_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR], context->effectContextId);
+
+ // Destroy the context
+ context->contextDescription.backendInterface.fpDestroyBackendContext(&context->contextDescription.backendInterface, context->effectContextId);
+
+ return FFX_OK;
+}
+
+FfxErrorCode ffxFsr1ContextCreate(FfxFsr1Context* context, const FfxFsr1ContextDescription* contextDescription)
+{
+ // Zero context memory
+ memset(context, 0, sizeof(FfxFsr1Context));
+
+ // Check pointers are valid.
+ FFX_RETURN_ON_ERROR(
+ context,
+ FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(
+ contextDescription,
+ FFX_ERROR_INVALID_POINTER);
+
+ // Validate that all callbacks are set for the interface
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetSDKVersion, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+
+ // If a scratch buffer is declared, then we must have a size
+ if (contextDescription->backendInterface.scratchBuffer) {
+
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE);
+ }
+
+ // Ensure the context is large enough for the internal context.
+ FFX_STATIC_ASSERT(sizeof(FfxFsr1Context) >= sizeof(FfxFsr1Context_Private));
+
+ // create the context.
+ FfxFsr1Context_Private* contextPrivate = (FfxFsr1Context_Private*)(context);
+ const FfxErrorCode errorCode = fsr1Create(contextPrivate, contextDescription);
+
+ return errorCode;
+}
+
+FFX_API FfxErrorCode ffxFsr1ContextGetGpuMemoryUsage(FfxFsr1Context* context, FfxEffectMemoryUsage* vramUsage)
+{
+ FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(vramUsage, FFX_ERROR_INVALID_POINTER);
+ FfxFsr1Context_Private* contextPrivate = (FfxFsr1Context_Private*)(context);
+
+ FFX_RETURN_ON_ERROR(contextPrivate->device, FFX_ERROR_NULL_DEVICE);
+
+ FfxErrorCode errorCode = contextPrivate->contextDescription.backendInterface.fpGetEffectGpuMemoryUsage(
+ &contextPrivate->contextDescription.backendInterface, contextPrivate->effectContextId, vramUsage);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ return FFX_OK;
+}
+
+FfxErrorCode ffxFsr1ContextDestroy(FfxFsr1Context* context)
+{
+ FFX_RETURN_ON_ERROR(
+ context,
+ FFX_ERROR_INVALID_POINTER);
+
+ // Destroy the context.
+ FfxFsr1Context_Private* contextPrivate = (FfxFsr1Context_Private*)(context);
+ const FfxErrorCode errorCode = fsr1Release(contextPrivate);
+ return errorCode;
+}
+
+FfxErrorCode ffxFsr1ContextDispatch(FfxFsr1Context* context, const FfxFsr1DispatchDescription* dispatchDescription)
+{
+ // check pointers are valid
+ FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(dispatchDescription, FFX_ERROR_INVALID_POINTER);
+
+ FfxFsr1Context_Private* contextPrivate = (FfxFsr1Context_Private*)(context);
+
+ // validate that renderSize is within the maximum.
+ FFX_RETURN_ON_ERROR(
+ dispatchDescription->renderSize.width <= contextPrivate->contextDescription.maxRenderSize.width,
+ FFX_ERROR_OUT_OF_RANGE);
+ FFX_RETURN_ON_ERROR(
+ dispatchDescription->renderSize.height <= contextPrivate->contextDescription.maxRenderSize.height,
+ FFX_ERROR_OUT_OF_RANGE);
+ FFX_RETURN_ON_ERROR(
+ contextPrivate->device,
+ FFX_ERROR_NULL_DEVICE);
+
+ // dispatch the FSR2 passes.
+ const FfxErrorCode errorCode = fsr1Dispatch(contextPrivate, dispatchDescription);
+ return errorCode;
+}
+
+float ffxFsr1GetUpscaleRatioFromQualityMode(FfxFsr1QualityMode qualityMode)
+{
+ switch (qualityMode) {
+ case FFX_FSR1_QUALITY_MODE_ULTRA_QUALITY:
+ return 1.3f;
+ case FFX_FSR1_QUALITY_MODE_QUALITY:
+ return 1.5f;
+ case FFX_FSR1_QUALITY_MODE_BALANCED:
+ return 1.7f;
+ case FFX_FSR1_QUALITY_MODE_PERFORMANCE:
+ return 2.0f;
+ default:
+ return 0.0f;
+ }
+}
+
+FfxErrorCode ffxFsr1GetRenderResolutionFromQualityMode(
+ uint32_t* renderWidth,
+ uint32_t* renderHeight,
+ uint32_t displayWidth,
+ uint32_t displayHeight,
+ FfxFsr1QualityMode qualityMode)
+{
+ FFX_RETURN_ON_ERROR(renderWidth, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(renderHeight, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(
+ FFX_FSR1_QUALITY_MODE_ULTRA_QUALITY <= qualityMode && qualityMode <= FFX_FSR1_QUALITY_MODE_PERFORMANCE,
+ FFX_ERROR_INVALID_ENUM);
+
+ // scale by the predefined ratios in each dimension.
+ const float ratio = ffxFsr1GetUpscaleRatioFromQualityMode(qualityMode);
+ const uint32_t scaledDisplayWidth = (uint32_t)((float)displayWidth / ratio);
+ const uint32_t scaledDisplayHeight = (uint32_t)((float)displayHeight / ratio);
+ *renderWidth = scaledDisplayWidth;
+ *renderHeight = scaledDisplayHeight;
+
+ return FFX_OK;
+}
+
+FFX_API FfxVersionNumber ffxFsr1GetEffectVersion()
+{
+ return FFX_SDK_MAKE_VERSION(FFX_FSR1_VERSION_MAJOR, FFX_FSR1_VERSION_MINOR, FFX_FSR1_VERSION_PATCH);
+}
diff --git a/thirdparty/amd-ffx/ffx_fsr1.h b/thirdparty/amd-ffx/ffx_fsr1.h
new file mode 100644
index 000000000000..5201884ac9b7
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_fsr1.h
@@ -0,0 +1,302 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+/// @defgroup ffxFsr1 FidelityFX FSR1
+/// FidelityFX Super Resolution 1 runtime library
+///
+/// @ingroup SDKComponents
+
+#pragma once
+
+/// Include the interface for the backend of the FSR 1.0 API.
+///
+/// @ingroup ffxFsr1
+#include "ffx_interface.h"
+
+/// FidelityFX Super Resolution 1.0 major version.
+///
+/// @ingroup ffxFsr1
+#define FFX_FSR1_VERSION_MAJOR (1)
+
+/// FidelityFX Super Resolution 1.0 minor version.
+///
+/// @ingroup ffxFsr1
+#define FFX_FSR1_VERSION_MINOR (2)
+
+/// FidelityFX Super Resolution 1.0 patch version.
+///
+/// @ingroup ffxFsr1
+#define FFX_FSR1_VERSION_PATCH (0)
+
+/// FidelityFX Super Resolution 1.0 context count
+///
+/// Defines the number of internal effect contexts required by FSR1
+///
+/// @ingroup ffxFsr1
+#define FFX_FSR1_CONTEXT_COUNT 2
+
+/// The size of the context specified in 32bit values.
+///
+/// @ingroup ffxFsr1
+
+// GODOT BEGINS
+// On non-Windows platforms `wchar_t` is 32 bytes rather than 16 bytes,
+// So we have to increase the size of the context by 2x.
+#define FFX_FSR1_CONTEXT_SIZE (52408)
+// GODOT ENDS
+
+#if defined(__cplusplus)
+extern "C" {
+#endif // #if defined(__cplusplus)
+
+/// An enumeration of all the passes which constitute the FSR1 algorithm.
+///
+/// FSR1 is implemented as a composite of several compute passes each
+/// computing a key part of the final result. Each call to the
+/// FfxFsr1ScheduleGpuJobFunc callback function will
+/// correspond to a single pass included in FfxFsr1Pass. For a
+/// more comprehensive description of each pass, please refer to the FSR1
+/// reference documentation.
+///
+/// @ingroup ffxFsr1
+typedef enum FfxFsr1Pass
+{
+ FFX_FSR1_PASS_EASU = 0, ///< A pass which upscales the color buffer using easu.
+ FFX_FSR1_PASS_EASU_RCAS = 1, ///< A pass which upscales the color buffer in preparation for rcas
+ FFX_FSR1_PASS_RCAS = 2, ///< A pass which performs rcas sharpening on the upscaled image.
+
+ FFX_FSR1_PASS_COUNT ///< The number of passes performed by FSR2.
+} FfxFsr1Pass;
+
+/// An enumeration of all the quality modes supported by FidelityFX Super
+/// Resolution 1 upscaling.
+///
+/// In order to provide a consistent user experience across multiple
+/// applications which implement FSR1. It is strongly recommended that the
+/// following preset scaling factors are made available through your
+/// application's user interface.
+///
+/// If your application does not expose the notion of preset scaling factors
+/// for upscaling algorithms (perhaps instead implementing a fixed ratio which
+/// is immutable) or implementing a more dynamic scaling scheme (such as
+/// dynamic resolution scaling), then there is no need to use these presets.
+///
+/// @ingroup ffxFsr1
+typedef enum FfxFsr1QualityMode {
+
+ FFX_FSR1_QUALITY_MODE_ULTRA_QUALITY = 0, ///< Perform upscaling with a per-dimension upscaling ratio of 1.3x.
+ FFX_FSR1_QUALITY_MODE_QUALITY = 1, ///< Perform upscaling with a per-dimension upscaling ratio of 1.5x.
+ FFX_FSR1_QUALITY_MODE_BALANCED = 2, ///< Perform upscaling with a per-dimension upscaling ratio of 1.7x.
+ FFX_FSR1_QUALITY_MODE_PERFORMANCE = 3 ///< Perform upscaling with a per-dimension upscaling ratio of 2.0x.
+} FfxFsr1QualityMode;
+
+/// An enumeration of bit flags used when creating a
+/// FfxFsr1Context. See FfxFsr1ContextDescription.
+///
+/// @ingroup ffxFsr1
+typedef enum FfxFsr1InitializationFlagBits {
+
+ FFX_FSR1_ENABLE_RCAS = (1 << 0), ///< A bit indicating if we should use rcas.
+ FFX_FSR1_RCAS_PASSTHROUGH_ALPHA = (1 << 1), ///< A bit indicating if we should use passthrough alpha during rcas.
+ FFX_FSR1_RCAS_DENOISE = (1 << 2), ///< A bit indicating if denoising is invoked during rcas.
+ FFX_FSR1_ENABLE_HIGH_DYNAMIC_RANGE = (1 << 3), ///< A bit indicating if the input color data provided is using a high-dynamic range.
+ FFX_FSR1_ENABLE_SRGB_CONVERSIONS = (1 << 4), ///< A bit indicating that input/output resources require gamma conversions
+
+} FfxFsr1InitializationFlagBits;
+
+/// A structure encapsulating the parameters required to initialize FidelityFX
+/// Super Resolution 1.0
+///
+/// @ingroup ffxFsr1
+typedef struct FfxFsr1ContextDescription {
+
+ uint32_t flags; ///< A collection of FfxFsr1InitializationFlagBits.
+ FfxSurfaceFormat outputFormat; ///< Format of the output target used for creation of the internal upscale resource
+ FfxDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at.
+ FfxDimensions2D displaySize; ///< The size of the presentation resolution targeted by the upscaling process.
+ FfxInterface backendInterface; ///< A set of pointers to the backend implementation for FSR1.
+} FfxFsr1ContextDescription;
+
+/// A structure encapsulating the parameters for dispatching the various passes
+/// of FidelityFX Super Resolution 1.0
+///
+/// @ingroup ffxFsr1
+typedef struct FfxFsr1DispatchDescription {
+
+ FfxCommandList commandList; ///< The FfxCommandList to record FSR1 rendering commands into.
+ FfxResource color; ///< A FfxResource containing the color buffer for the current frame (at render resolution).
+ FfxResource output; ///< A FfxResource containing the output color buffer for the current frame (at presentation resolution).
+ FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resource.
+ bool enableSharpening; ///< Enable an additional sharpening pass.
+ float sharpness; ///< The sharpness value between 0 and 1, where 0 is no additional sharpness and 1 is maximum additional sharpness.
+} FfxFsr1DispatchDescription;
+
+/// A structure encapsulating the FidelityFX Super Resolution 1.0 context.
+///
+/// This sets up an object which contains all persistent internal data and
+/// resources that are required by FSR1.
+///
+/// The FfxFsr1Context object should have a lifetime matching
+/// your use of FSR1. Before destroying the FSR1 context care should be taken
+/// to ensure the GPU is not accessing the resources created or used by FSR1.
+/// It is therefore recommended that the GPU is idle before destroying the
+/// FSR1 context.
+///
+/// @ingroup ffxFsr1
+typedef struct FfxFsr1Context {
+
+ uint32_t data[FFX_FSR1_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context.
+} FfxFsr1Context;
+
+
+/// Create a FidelityFX Super Resolution 1.0 context from the parameters
+/// programmed to the FfxFsr1ContextDescription structure.
+///
+/// The context structure is the main object used to interact with the Super
+/// Resoution 1.0 API, and is responsible for the management of the internal resources
+/// used by the FSR1 algorithm. When this API is called, multiple calls
+/// will be made via the pointers contained in the callbacks
+/// structure. These callbacks will attempt to retreive the device capabilities,
+/// and create the internal resources, and pipelines required by FSR1
+/// frame-to-frame function. Depending on the precise configuration used when
+/// creating the FfxFsr1Context a different set of resources and
+/// pipelines might be requested via the callback functions.
+///
+/// The FfxParallelSortContext should be destroyed when use of it is
+/// completed, typically when an application is unloaded or FSR1
+/// upscaling is disabled by a user. To destroy the FSR1 context you
+/// should call ffxFsr1ContextDestroy.
+///
+/// @param [out] pContext A pointer to a FfxFsr1Context structure to populate.
+/// @param [in] pContextDescription A pointer to a FfxFsr1ContextDescription structure.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or contextDescription was NULL.
+/// @retval
+/// FFX_ERROR_INCOMPLETE_INTERFACE The operation failed because the FfxFsr1ContextDescription.callbacks was not fully specified.
+/// @retval
+/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend.
+///
+/// @ingroup ffxFsr1
+FFX_API FfxErrorCode ffxFsr1ContextCreate(FfxFsr1Context* pContext, const FfxFsr1ContextDescription* pContextDescription);
+
+/// Get GPU memory usage of the FidelityFX Super Resolution context.
+///
+/// @param [in] pContext A pointer to a FfxFsr1Context structure.
+/// @param [out] pVramUsage A pointer to a FfxEffectMemoryUsage structure.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or vramUsage were NULL.
+///
+/// @ingroup ffxFsr1
+FFX_API FfxErrorCode ffxFsr1ContextGetGpuMemoryUsage(FfxFsr1Context* pContext, FfxEffectMemoryUsage* pVramUsage);
+
+/// @param [out] pContext A pointer to a FfxFsr1Context structure to populate.
+/// @param [in] pDispatchDescription A pointer to a FfxFsr1DispatchDescription structure.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or dispatchDescription was NULL.
+/// @retval
+/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend.
+///
+/// @ingroup ffxFsr1
+FFX_API FfxErrorCode ffxFsr1ContextDispatch(FfxFsr1Context* pContext, const FfxFsr1DispatchDescription* pDispatchDescription);
+
+/// Destroy the FidelityFX FSR 1 context.
+///
+/// @param [out] pContext A pointer to a FfxFsr1Context structure to destroy.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context was NULL.
+///
+/// @ingroup ffxFsr1
+FFX_API FfxErrorCode ffxFsr1ContextDestroy(FfxFsr1Context* pContext);
+
+/// Get the upscale ratio from the quality mode.
+///
+/// The following table enumerates the mapping of the quality modes to
+/// per-dimension scaling ratios.
+///
+/// Quality preset | Scale factor
+/// ----------------------------------------------------- | -------------
+/// FFX_FSR1_QUALITY_MODE_ULTRA_QUALITY | 1.3x
+/// FFX_FSR1_QUALITY_MODE_QUALITY | 1.5x
+/// FFX_FSR1_QUALITY_MODE_BALANCED | 1.7x
+/// FFX_FSR1_QUALITY_MODE_PERFORMANCE | 2.0x
+///
+/// Passing an invalid qualityMode will return 0.0f.
+///
+/// @param [in] qualityMode The quality mode preset.
+///
+/// @returns
+/// The upscaling the per-dimension upscaling ratio for
+/// qualityMode according to the table above.
+///
+/// @ingroup ffxFsr1
+FFX_API float ffxFsr1GetUpscaleRatioFromQualityMode(FfxFsr1QualityMode qualityMode);
+
+/// A helper function to calculate the rendering resolution from a target
+/// resolution and desired quality level.
+///
+/// This function applies the scaling factor returned by
+/// ffxFsr1GetUpscaleRatioFromQualityMode to each dimension.
+///
+/// @param [out] pRenderWidth A pointer to a uint32_t which will hold the calculated render resolution width.
+/// @param [out] pRenderHeight A pointer to a uint32_t which will hold the calculated render resolution height.
+/// @param [in] displayWidth The target display resolution width.
+/// @param [in] displayHeight The target display resolution height.
+/// @param [in] qualityMode The desired quality mode for FSR1 upscaling.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_INVALID_POINTER Either renderWidth or renderHeight was NULL.
+/// @retval
+/// FFX_ERROR_INVALID_ENUM An invalid quality mode was specified.
+///
+/// @ingroup ffxFsr1
+FFX_API FfxErrorCode ffxFsr1GetRenderResolutionFromQualityMode(
+ uint32_t* pRenderWidth,
+ uint32_t* pRenderHeight,
+ uint32_t displayWidth,
+ uint32_t displayHeight,
+ FfxFsr1QualityMode qualityMode);
+
+/// Queries the effect version number.
+///
+/// @returns
+/// The SDK version the effect was built with.
+///
+/// @ingroup ffxFsr1
+FFX_API FfxVersionNumber ffxFsr1GetEffectVersion();
+
+#if defined(__cplusplus)
+}
+#endif // #if defined(__cplusplus)
diff --git a/thirdparty/amd-ffx/ffx_fsr1_private.h b/thirdparty/amd-ffx/ffx_fsr1_private.h
new file mode 100644
index 000000000000..1c9b23ed4fd7
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_fsr1_private.h
@@ -0,0 +1,75 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+#include "gpu/fsr1/ffx_fsr1_resources.h"
+
+/// An enumeration of all the permutations that can be passed to the FSR1 algorithm.
+///
+/// FSR1 features are organized through a set of pre-defined compile
+/// permutation options that need to be specified. Which shader blob
+/// is returned for pipeline creation will be determined by what combination
+/// of shader permutations are enabled.
+///
+typedef enum Fs1ShaderPermutationOptions
+{
+ FSR1_SHADER_PERMUTATION_APPLY_RCAS = (1 << 0), ///< RCAS will be applied, outputs to correct intermediary target
+ FSR1_SHADER_PERMUTATION_RCAS_PASSTHROUGH_ALPHA = (1 << 1), ///< Compile RCAS to pass through the input alpha value
+ FSR1_SHADER_PERMUTATION_SRGB_CONVERSIONS = (1 << 2), ///< Handle necessary conversions for SRGB formats (de-gamma in and gamma out)
+ FSR1_SHADER_PERMUTATION_FORCE_WAVE64 = (1 << 3), ///< doesn't map to a define, selects different table
+ FSR1_SHADER_PERMUTATION_ALLOW_FP16 = (1 << 4), ///< Enables fast math computations where possible
+} Fs1ShaderPermutationOptions;
+
+// Constants for FSR1 dispatches. Must be kept in sync with cbFSR1 in ffx_fsr1_callbacks_hlsl.h
+typedef struct Fsr1Constants
+{
+ FfxUInt32x4 const0;
+ FfxUInt32x4 const1;
+ FfxUInt32x4 const2;
+ FfxUInt32x4 const3;
+ FfxUInt32x4 sample;
+} Fsr1Constants;
+
+struct FfxFsr1ContextDescription;
+struct FfxDeviceCapabilities;
+struct FfxPipelineState;
+struct FfxResource;
+
+// FfxFsr1Context_Private
+// The private implementation of the FSR1 context.
+typedef struct FfxFsr1Context_Private {
+
+ FfxFsr1ContextDescription contextDescription;
+ FfxUInt32 effectContextId;
+ Fsr1Constants constants;
+ FfxDevice device;
+ FfxDeviceCapabilities deviceCapabilities;
+ FfxConstantBuffer constantBuffer;
+
+ FfxPipelineState pipelineEASU;
+ FfxPipelineState pipelineEASU_RCAS;
+ FfxPipelineState pipelineRCAS;
+
+ FfxResourceInternal srvResources[FFX_FSR1_RESOURCE_IDENTIFIER_COUNT];
+ FfxResourceInternal uavResources[FFX_FSR1_RESOURCE_IDENTIFIER_COUNT];
+
+} FfxFsr1Context_Private;
diff --git a/thirdparty/amd-fsr2/ffx_fsr2.cpp b/thirdparty/amd-ffx/ffx_fsr2.cpp
similarity index 50%
rename from thirdparty/amd-fsr2/ffx_fsr2.cpp
rename to thirdparty/amd-ffx/ffx_fsr2.cpp
index ec571b9cd27d..bae2a6b1e232 100644
--- a/thirdparty/amd-fsr2/ffx_fsr2.cpp
+++ b/thirdparty/amd-ffx/ffx_fsr2.cpp
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -23,27 +24,26 @@
#include // for fabs, abs, sinf, sqrt, etc.
#include // for memset
#include // for FLT_EPSILON
-#include "ffx_fsr2.h"
-#define FFX_CPU
-#include "shaders/ffx_core.h"
-#include "shaders/ffx_fsr1.h"
-#include "shaders/ffx_spd.h"
-#include "shaders/ffx_fsr2_callbacks_hlsl.h"
-
-#include "ffx_fsr2_maximum_bias.h"
#ifdef __clang__
-#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wsign-compare"
+#pragma clang diagnostic ignored "-Wunused-function"
#endif
-#ifndef _countof
-#define _countof(array) (sizeof(array) / sizeof(array[0]))
+#ifdef _MSC_VER
+#pragma warning(disable : 4505)
#endif
-#ifndef _MSC_VER
-#include
-#define wcscpy_s wcscpy
-#endif
+#include "ffx_fsr2.h"
+#define FFX_CPU
+#include "gpu/ffx_core.h"
+#include "gpu/fsr1/ffx_fsr1.h"
+#include "gpu/spd/ffx_spd.h"
+#include "gpu/fsr2/ffx_fsr2_callbacks_hlsl.h"
+#include "gpu/fsr2/ffx_fsr2_common.h"
+#include "ffx_object_management.h"
+
+#include "ffx_fsr2_maximum_bias.h"
// max queued frames for descriptor management
static const uint32_t FSR2_MAX_QUEUED_FRAMES = 16;
@@ -57,7 +57,7 @@ typedef struct ResourceBinding
wchar_t name[64];
}ResourceBinding;
-static const ResourceBinding srvResourceBindingTable[] =
+static const ResourceBinding srvTextureBindingTable[] =
{
{FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR, L"r_input_color_jittered"},
{FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY, L"r_input_opaque_only"},
@@ -88,7 +88,7 @@ static const ResourceBinding srvResourceBindingTable[] =
{FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR, L"r_input_prev_color_post_alpha"},
};
-static const ResourceBinding uavResourceBindingTable[] =
+static const ResourceBinding uavTextureBindingTable[] =
{
{FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"rw_reconstructed_previous_nearest_depth"},
{FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, L"rw_dilated_motion_vectors"},
@@ -111,7 +111,7 @@ static const ResourceBinding uavResourceBindingTable[] =
{FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR, L"rw_output_prev_color_post_alpha"},
};
-static const ResourceBinding cbResourceBindingTable[] =
+static const ResourceBinding constantBufferBindingTable[] =
{
{FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2, L"cbFSR2"},
{FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD, L"cbSPD"},
@@ -120,14 +120,14 @@ static const ResourceBinding cbResourceBindingTable[] =
};
// Broad structure of the root signature.
-typedef enum Fsr2RootSignatureLayout {
+/*typedef enum Fsr2RootSignatureLayout {
FSR2_ROOT_SIGNATURE_LAYOUT_UAVS,
FSR2_ROOT_SIGNATURE_LAYOUT_SRVS,
FSR2_ROOT_SIGNATURE_LAYOUT_CONSTANTS,
FSR2_ROOT_SIGNATURE_LAYOUT_CONSTANTS_REGISTER_1,
FSR2_ROOT_SIGNATURE_LAYOUT_PARAMETER_COUNT
-} Fsr2RootSignatureLayout;
+} Fsr2RootSignatureLayout;*/
typedef struct Fsr2RcasConstants {
@@ -167,27 +167,6 @@ typedef union Fsr2SecondaryUnion {
Fsr2GenerateReactiveConstants2 autogenReactive;
} Fsr2SecondaryUnion;
-typedef struct Fsr2ResourceDescription {
-
- uint32_t id;
- const wchar_t* name;
- FfxResourceUsage usage;
- FfxSurfaceFormat format;
- uint32_t width;
- uint32_t height;
- uint32_t mipCount;
- FfxResourceFlags flags;
- uint32_t initDataSize;
- void* initData;
-} Fsr2ResourceDescription;
-
-FfxConstantBuffer globalFsr2ConstantBuffers[4] = {
- { sizeof(Fsr2Constants) / sizeof(uint32_t) },
- { sizeof(Fsr2SpdConstants) / sizeof(uint32_t) },
- { sizeof(Fsr2RcasConstants) / sizeof(uint32_t) },
- { sizeof(Fsr2GenerateReactiveConstants) / sizeof(uint32_t) }
-};
-
// Lanczos
static float lanczos2(float value)
{
@@ -213,77 +192,77 @@ static void fsr2DebugCheckDispatch(FfxFsr2Context_Private* context, const FfxFsr
{
if (params->commandList == nullptr)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"commandList is null");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"commandList is null");
}
if (params->color.resource == nullptr)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"color resource is null");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"color resource is null");
}
if (params->depth.resource == nullptr)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"depth resource is null");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"depth resource is null");
}
if (params->motionVectors.resource == nullptr)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"motionVectors resource is null");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"motionVectors resource is null");
}
if (params->exposure.resource != nullptr)
{
if ((context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE) == FFX_FSR2_ENABLE_AUTO_EXPOSURE)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"exposure resource provided, however auto exposure flag is present");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"exposure resource provided, however auto exposure flag is present");
}
}
if (params->output.resource == nullptr)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"output resource is null");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"output resource is null");
}
if (fabs(params->jitterOffset.x) > 1.0f || fabs(params->jitterOffset.y) > 1.0f)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"jitterOffset contains value outside of expected range [-1.0, 1.0]");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"jitterOffset contains value outside of expected range [-1.0, 1.0]");
}
if ((params->motionVectorScale.x > (float)context->contextDescription.maxRenderSize.width) ||
(params->motionVectorScale.y > (float)context->contextDescription.maxRenderSize.height))
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"motionVectorScale contains scale value greater than maxRenderSize");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"motionVectorScale contains scale value greater than maxRenderSize");
}
if ((params->motionVectorScale.x == 0.0f) ||
(params->motionVectorScale.y == 0.0f))
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"motionVectorScale contains zero scale value");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"motionVectorScale contains zero scale value");
}
if ((params->renderSize.width > context->contextDescription.maxRenderSize.width) ||
(params->renderSize.height > context->contextDescription.maxRenderSize.height))
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"renderSize is greater than context maxRenderSize");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"renderSize is greater than context maxRenderSize");
}
if ((params->renderSize.width == 0) ||
(params->renderSize.height == 0))
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"renderSize contains zero dimension");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"renderSize contains zero dimension");
}
if (params->sharpness < 0.0f || params->sharpness > 1.0f)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"sharpness contains value outside of expected range [0.0, 1.0]");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"sharpness contains value outside of expected range [0.0, 1.0]");
}
if (params->frameTimeDelta < 1.0f)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"frameTimeDelta is less than 1.0f - this value should be milliseconds (~16.6f for 60fps)");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"frameTimeDelta is less than 1.0f - this value should be milliseconds (~16.6f for 60fps)");
}
if (params->preExposure == 0.0f)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"preExposure provided as 0.0f which is invalid");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"preExposure provided as 0.0f which is invalid");
}
bool infiniteDepth = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INFINITE) == FFX_FSR2_ENABLE_DEPTH_INFINITE;
@@ -293,20 +272,20 @@ static void fsr2DebugCheckDispatch(FfxFsr2Context_Private* context, const FfxFsr
{
if (params->cameraNear < params->cameraFar)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING,
L"FFX_FSR2_ENABLE_DEPTH_INVERTED flag is present yet cameraNear is less than cameraFar");
}
if (infiniteDepth)
{
if (params->cameraNear != FLT_MAX)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING,
L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, yet cameraNear != FLT_MAX");
}
}
if (params->cameraFar < 0.075f)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING,
L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, cameraFar value is very low which may result in depth separation artefacting");
}
}
@@ -314,117 +293,184 @@ static void fsr2DebugCheckDispatch(FfxFsr2Context_Private* context, const FfxFsr
{
if (params->cameraNear > params->cameraFar)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING,
L"cameraNear is greater than cameraFar in non-inverted-depth context");
}
if (infiniteDepth)
{
if (params->cameraFar != FLT_MAX)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING,
L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, yet cameraFar != FLT_MAX");
}
}
if (params->cameraNear < 0.075f)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING,
L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, cameraNear value is very low which may result in depth separation artefacting");
}
}
if (params->cameraFovAngleVertical <= 0.0f)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is 0.0f - this value should be > 0.0f");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is 0.0f - this value should be > 0.0f");
}
if (params->cameraFovAngleVertical > FFX_PI)
{
- context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is greater than 180 degrees/PI");
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is greater than 180 degrees/PI");
}
}
static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
{
- for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvCount; ++srvIndex)
+ for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvTextureCount; ++srvIndex)
{
int32_t mapIndex = 0;
- for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex)
+ for (mapIndex = 0; mapIndex < _countof(srvTextureBindingTable); ++mapIndex)
{
- if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvResourceBindings[srvIndex].name))
+ if (0 == wcscmp(srvTextureBindingTable[mapIndex].name, inoutPipeline->srvTextureBindings[srvIndex].name))
break;
}
- if (mapIndex == _countof(srvResourceBindingTable))
+ if (mapIndex == _countof(srvTextureBindingTable))
return FFX_ERROR_INVALID_ARGUMENT;
- inoutPipeline->srvResourceBindings[srvIndex].resourceIdentifier = srvResourceBindingTable[mapIndex].index;
+ inoutPipeline->srvTextureBindings[srvIndex].resourceIdentifier = srvTextureBindingTable[mapIndex].index;
}
- for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavCount; ++uavIndex)
+ for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavTextureCount; ++uavIndex)
{
int32_t mapIndex = 0;
- for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex)
+ for (mapIndex = 0; mapIndex < _countof(uavTextureBindingTable); ++mapIndex)
{
- if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavResourceBindings[uavIndex].name))
+ if (0 == wcscmp(uavTextureBindingTable[mapIndex].name, inoutPipeline->uavTextureBindings[uavIndex].name))
break;
}
- if (mapIndex == _countof(uavResourceBindingTable))
+ if (mapIndex == _countof(uavTextureBindingTable))
return FFX_ERROR_INVALID_ARGUMENT;
- inoutPipeline->uavResourceBindings[uavIndex].resourceIdentifier = uavResourceBindingTable[mapIndex].index;
+ inoutPipeline->uavTextureBindings[uavIndex].resourceIdentifier = uavTextureBindingTable[mapIndex].index;
}
for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex)
{
int32_t mapIndex = 0;
- for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex)
+ for (mapIndex = 0; mapIndex < _countof(constantBufferBindingTable); ++mapIndex)
{
- if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->cbResourceBindings[cbIndex].name))
+ if (0 == wcscmp(constantBufferBindingTable[mapIndex].name, inoutPipeline->constantBufferBindings[cbIndex].name))
break;
}
- if (mapIndex == _countof(cbResourceBindingTable))
+ if (mapIndex == _countof(constantBufferBindingTable))
return FFX_ERROR_INVALID_ARGUMENT;
- inoutPipeline->cbResourceBindings[cbIndex].resourceIdentifier = cbResourceBindingTable[mapIndex].index;
+ inoutPipeline->constantBufferBindings[cbIndex].resourceIdentifier = constantBufferBindingTable[mapIndex].index;
}
return FFX_OK;
}
+static uint32_t getPipelinePermutationFlags(uint32_t contextFlags, FfxFsr2Pass passId, bool fp16, bool force64, bool useLut)
+{
+ // work out what permutation to load.
+ uint32_t flags = 0;
+ flags |= (contextFlags & FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE) ? FSR2_SHADER_PERMUTATION_HDR_COLOR_INPUT : 0;
+ flags |= (contextFlags & FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) ? 0 : FSR2_SHADER_PERMUTATION_LOW_RES_MOTION_VECTORS;
+ flags |= (contextFlags & FFX_FSR2_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) ? FSR2_SHADER_PERMUTATION_JITTER_MOTION_VECTORS : 0;
+ flags |= (contextFlags & FFX_FSR2_ENABLE_DEPTH_INVERTED) ? FSR2_SHADER_PERMUTATION_DEPTH_INVERTED : 0;
+ flags |= (passId == FFX_FSR2_PASS_ACCUMULATE_SHARPEN) ? FSR2_SHADER_PERMUTATION_ENABLE_SHARPENING : 0;
+ flags |= (useLut) ? FSR2_SHADER_PERMUTATION_USE_LANCZOS_TYPE : 0;
+ flags |= (force64) ? FSR2_SHADER_PERMUTATION_FORCE_WAVE64 : 0;
+#if defined(_GAMING_XBOX)
+ /** On Xbox we enable 16-bit math, and use 32-bit within the shader only where it's necessary. */
+ flags |= (fp16) ? FSR2_SHADER_PERMUTATION_ALLOW_FP16 : 0;
+#else
+ flags |= (fp16 && (passId != FFX_FSR2_PASS_RCAS)) ? FSR2_SHADER_PERMUTATION_ALLOW_FP16 : 0;
+#endif // defined(_GAMING_XBOX)
+ return flags;
+}
static FfxErrorCode createPipelineStates(FfxFsr2Context_Private* context)
{
FFX_ASSERT(context);
- const size_t samplerCount = 2;
- FfxFilterType samplers[samplerCount];
- samplers[0] = FFX_FILTER_TYPE_POINT;
- samplers[1] = FFX_FILTER_TYPE_LINEAR;
-
- const size_t rootConstantCount = 2;
- uint32_t rootConstants[rootConstantCount];
- rootConstants[0] = sizeof(Fsr2Constants) / sizeof(uint32_t);
- rootConstants[1] = sizeof(Fsr2SecondaryUnion) / sizeof(uint32_t);
-
- FfxPipelineDescription pipelineDescription;
+ FfxPipelineDescription pipelineDescription = {};
pipelineDescription.contextFlags = context->contextDescription.flags;
- pipelineDescription.samplerCount = samplerCount;
- pipelineDescription.samplers = samplers;
- pipelineDescription.rootConstantBufferCount = rootConstantCount;
- pipelineDescription.rootConstantBufferSizes = rootConstants;
-
- // New interface: will handle RootSignature in backend
- // set up pipeline descriptor (basically RootSignature and binding)
- FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID, &pipelineDescription, &context->pipelineComputeLuminancePyramid));
- FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_RCAS, &pipelineDescription, &context->pipelineRCAS));
- FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_GENERATE_REACTIVE, &pipelineDescription, &context->pipelineGenerateReactive));
- FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_TCR_AUTOGENERATE, &pipelineDescription, &context->pipelineTcrAutogenerate));
+
+ // Samplers
+ pipelineDescription.samplerCount = 2;
+ FfxSamplerDescription samplerDescs[2] = { { FFX_FILTER_TYPE_MINMAGMIP_POINT, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE },
+ { FFX_FILTER_TYPE_MINMAGMIP_LINEAR, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE} };
+ pipelineDescription.samplers = samplerDescs;
+
+ // Root constants
+ pipelineDescription.rootConstantBufferCount = 2;
+ FfxRootConstantDescription rootConstantDescs[2] = { {sizeof(Fsr2Constants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE },
+ { sizeof(Fsr2SecondaryUnion) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE } };
+ pipelineDescription.rootConstants = rootConstantDescs;
+
+ // Query device capabilities
+ FfxDeviceCapabilities capabilities;
+ context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &capabilities);
+
+ // Setup a few options used to determine permutation flags
+ bool haveShaderModel66 = capabilities.maximumSupportedShaderModel >= FFX_SHADER_MODEL_6_6;
+ bool supportedFP16 = capabilities.fp16Supported;
+ bool canForceWave64 = false;
+ bool useLut = false;
+
+ const uint32_t waveLaneCountMin = capabilities.waveLaneCountMin;
+ const uint32_t waveLaneCountMax = capabilities.waveLaneCountMax;
+ if (waveLaneCountMin <= 64 && waveLaneCountMax >= 64)
+ {
+ useLut = true;
+ canForceWave64 = haveShaderModel66;
+ }
+ else
+ canForceWave64 = false;
+
+ // Work out what permutation to load.
+ uint32_t contextFlags = context->contextDescription.flags;
+
+ // Set up pipeline descriptor (basically RootSignature and binding)
+ wcscpy_s(pipelineDescription.name, L"FSR2-LUM_PYRAMID");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineComputeLuminancePyramid));
+ wcscpy_s(pipelineDescription.name, L"FSR2-RCAS");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_RCAS,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_RCAS, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineRCAS));
+ wcscpy_s(pipelineDescription.name, L"FSR2-GEN_REACTIVE");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_GENERATE_REACTIVE,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_GENERATE_REACTIVE, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineGenerateReactive));
+ wcscpy_s(pipelineDescription.name, L"FSR2-TCR_AUTOGENERATE");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_TCR_AUTOGENERATE,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_TCR_AUTOGENERATE, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineTcrAutogenerate));
pipelineDescription.rootConstantBufferCount = 1;
- FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_DEPTH_CLIP, &pipelineDescription, &context->pipelineDepthClip));
- FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH, &pipelineDescription, &context->pipelineReconstructPreviousDepth));
- FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_LOCK, &pipelineDescription, &context->pipelineLock));
- FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_ACCUMULATE, &pipelineDescription, &context->pipelineAccumulate));
- FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_ACCUMULATE_SHARPEN, &pipelineDescription, &context->pipelineAccumulateSharpen));
-
+
+ wcscpy_s(pipelineDescription.name, L"FSR2-DEPTH_CLIP");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_DEPTH_CLIP,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_DEPTH_CLIP, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineDepthClip));
+ wcscpy_s(pipelineDescription.name, L"FSR2-RECON_PREV_DEPTH");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineReconstructPreviousDepth));
+ wcscpy_s(pipelineDescription.name, L"FSR2-LOCK");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_LOCK,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_LOCK, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineLock));
+ wcscpy_s(pipelineDescription.name, L"FSR2-ACCUMULATE");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_ACCUMULATE,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_ACCUMULATE, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineAccumulate));
+ wcscpy_s(pipelineDescription.name, L"FSR2-ACCUM_SHARP");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_ACCUMULATE_SHARPEN,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_ACCUMULATE_SHARPEN, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineAccumulateSharpen));
+
// for each pipeline: re-route/fix-up IDs based on names
patchResourceBindings(&context->pipelineDepthClip);
patchResourceBindings(&context->pipelineReconstructPreviousDepth);
@@ -448,26 +494,27 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con
// Setup the data for implementation.
memset(context, 0, sizeof(FfxFsr2Context_Private));
- context->device = contextDescription->device;
+ context->device = contextDescription->backendInterface.device;
memcpy(&context->contextDescription, contextDescription, sizeof(FfxFsr2ContextDescription));
- if ((context->contextDescription.flags & FFX_FSR2_ENABLE_DEBUG_CHECKING) == FFX_FSR2_ENABLE_DEBUG_CHECKING)
- {
- if (context->contextDescription.fpMessage == nullptr)
- {
- FFX_ASSERT(context->contextDescription.fpMessage != nullptr);
- // remove the debug checking flag - we have no message function
- context->contextDescription.flags &= ~FFX_FSR2_ENABLE_DEBUG_CHECKING;
- }
- }
+ // Check version info - make sure we are linked with the right backend version
+ FfxVersionNumber version = context->contextDescription.backendInterface.fpGetSDKVersion(&context->contextDescription.backendInterface);
+ FFX_RETURN_ON_ERROR(version == FFX_SDK_MAKE_VERSION(1, 1, 4), FFX_ERROR_INVALID_VERSION);
+
+ // Setup constant buffer sizes.
+ context->constantBuffers[0].num32BitEntries = sizeof(Fsr2Constants) / sizeof(uint32_t);
+ context->constantBuffers[1].num32BitEntries = sizeof(Fsr2SpdConstants) / sizeof(uint32_t);
+ context->constantBuffers[2].num32BitEntries = sizeof(Fsr2RcasConstants) / sizeof(uint32_t);
+ context->constantBuffers[3].num32BitEntries = sizeof(Fsr2GenerateReactiveConstants) / sizeof(uint32_t);
- // Create the device.
- FfxErrorCode errorCode = context->contextDescription.callbacks.fpCreateBackendContext(&context->contextDescription.callbacks, context->device);
+ // Create the context.
+ FfxErrorCode errorCode =
+ context->contextDescription.backendInterface.fpCreateBackendContext(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, nullptr, &context->effectContextId);
FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
// call out for device caps.
- errorCode = context->contextDescription.callbacks.fpGetDeviceCapabilities(&context->contextDescription.callbacks, &context->deviceCapabilities, context->device);
+ errorCode = context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &context->deviceCapabilities);
FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
// set defaults
@@ -495,92 +542,307 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con
maximumBias[i] = int16_t(roundf(ffxFsr2MaximumBias[i] / 2.0f * 32767.0f));
}
- uint8_t defaultReactiveMaskData = 0U;
- uint32_t atomicInitData = 0U;
- float defaultExposure[] = { 0.0f, 0.0f };
- const FfxResourceType texture1dResourceType = (context->contextDescription.flags & FFX_FSR2_ENABLE_TEXTURE1D_USAGE) ? FFX_RESOURCE_TYPE_TEXTURE1D : FFX_RESOURCE_TYPE_TEXTURE2D;
-
// declare internal resources needed
- const Fsr2ResourceDescription internalSurfaceDesc[] = {
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"FSR2_PreparedInputColor", FFX_RESOURCE_USAGE_UAV,
- FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"FSR2_ReconstructedPrevNearestDepth", FFX_RESOURCE_USAGE_UAV,
- FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1, L"FSR2_InternalDilatedVelocity1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
- FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2, L"FSR2_InternalDilatedVelocity2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
- FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"FSR2_DilatedDepth", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
- FFX_SURFACE_FORMAT_R32_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1, L"FSR2_LockStatus1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
- FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2, L"FSR2_LockStatus2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
- FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA, L"FSR2_LockInputLuma", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV),
- FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS, L"FSR2_NewLocks", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV),
- FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1, L"FSR2_InternalUpscaled1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
- FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2, L"FSR2_InternalUpscaled2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
- FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE, L"FSR2_ExposureMips", FFX_RESOURCE_USAGE_UAV,
- FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width / 2, contextDescription->maxRenderSize.height / 2, 0, FFX_RESOURCE_FLAGS_ALIASABLE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1, L"FSR2_LumaHistory1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
- FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2, L"FSR2_LumaHistory2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
- FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT, L"FSR2_SpdAtomicCounter", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV),
- FFX_SURFACE_FORMAT_R32_UINT, 1, 1, 1, FFX_RESOURCE_FLAGS_ALIASABLE, sizeof(atomicInitData), &atomicInitData },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, L"FSR2_DilatedReactiveMasks", FFX_RESOURCE_USAGE_UAV,
- FFX_SURFACE_FORMAT_R8G8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT, L"FSR2_LanczosLutData", FFX_RESOURCE_USAGE_READ_ONLY,
- FFX_SURFACE_FORMAT_R16_SNORM, lanczos2LutWidth, 1, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(lanczos2Weights), lanczos2Weights },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY, L"FSR2_DefaultReactiviyMask", FFX_RESOURCE_USAGE_READ_ONLY,
- FFX_SURFACE_FORMAT_R8_UNORM, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(defaultReactiveMaskData), &defaultReactiveMaskData },
-
- { FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT, L"FSR2_MaximumUpsampleBias", FFX_RESOURCE_USAGE_READ_ONLY,
- FFX_SURFACE_FORMAT_R16_SNORM, FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH, FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(maximumBias), maximumBias },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE, L"FSR2_DefaultExposure", FFX_RESOURCE_USAGE_READ_ONLY,
- FFX_SURFACE_FORMAT_R32G32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(defaultExposure), defaultExposure },
-
- { FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE, L"FSR2_AutoExposure", FFX_RESOURCE_USAGE_UAV,
- FFX_SURFACE_FORMAT_R32G32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE },
-
-
- // only one for now, will need pingpont to respect the motion vectors
- { FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE, L"FSR2_AutoReactive", FFX_RESOURCE_USAGE_UAV,
- FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
- { FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION, L"FSR2_AutoComposition", FFX_RESOURCE_USAGE_UAV,
- FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
- { FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1, L"FSR2_PrevPreAlpha0", FFX_RESOURCE_USAGE_UAV,
- FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
- { FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1, L"FSR2_PrevPostAlpha0", FFX_RESOURCE_USAGE_UAV,
- FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
- { FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2, L"FSR2_PrevPreAlpha1", FFX_RESOURCE_USAGE_UAV,
- FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
- { FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2, L"FSR2_PrevPostAlpha1", FFX_RESOURCE_USAGE_UAV,
- FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
-
+ const FfxInternalResourceDescription internalSurfaceDesc[] = {
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR,
+ L"FSR2_PreparedInputColor",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET),
+ FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT,
+ contextDescription->maxRenderSize.width,
+ contextDescription->maxRenderSize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_ALIASABLE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH,
+ L"FSR2_ReconstructedPrevNearestDepth",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R32_UINT,
+ contextDescription->maxRenderSize.width,
+ contextDescription->maxRenderSize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_ALIASABLE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1,
+ L"FSR2_InternalDilatedVelocity1",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET),
+ FFX_SURFACE_FORMAT_R16G16_FLOAT,
+ contextDescription->maxRenderSize.width,
+ contextDescription->maxRenderSize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2,
+ L"FSR2_InternalDilatedVelocity2",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET),
+ FFX_SURFACE_FORMAT_R16G16_FLOAT,
+ contextDescription->maxRenderSize.width,
+ contextDescription->maxRenderSize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH,
+ L"FSR2_DilatedDepth",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R32_FLOAT,
+ contextDescription->maxRenderSize.width,
+ contextDescription->maxRenderSize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_ALIASABLE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1,
+ L"FSR2_LockStatus1",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R16G16_FLOAT,
+ contextDescription->displaySize.width,
+ contextDescription->displaySize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2,
+ L"FSR2_LockStatus2",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R16G16_FLOAT,
+ contextDescription->displaySize.width,
+ contextDescription->displaySize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA,
+ L"FSR2_LockInputLuma",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R16_FLOAT,
+ contextDescription->maxRenderSize.width,
+ contextDescription->maxRenderSize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_ALIASABLE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS,
+ L"FSR2_NewLocks",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R8_UNORM,
+ contextDescription->displaySize.width,
+ contextDescription->displaySize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_ALIASABLE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1,
+ L"FSR2_InternalUpscaled1",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET),
+ FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT,
+ contextDescription->displaySize.width,
+ contextDescription->displaySize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2,
+ L"FSR2_InternalUpscaled2",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET),
+ FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT,
+ contextDescription->displaySize.width,
+ contextDescription->displaySize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE,
+ L"FSR2_ExposureMips",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16_FLOAT,
+ contextDescription->maxRenderSize.width / 2,
+ contextDescription->maxRenderSize.height / 2,
+ 0,
+ FFX_RESOURCE_FLAGS_ALIASABLE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1,
+ L"FSR2_LumaHistory1",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R8G8B8A8_UNORM,
+ contextDescription->displaySize.width,
+ contextDescription->displaySize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2,
+ L"FSR2_LumaHistory2",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R8G8B8A8_UNORM,
+ contextDescription->displaySize.width,
+ contextDescription->displaySize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT,
+ L"FSR2_SpdAtomicCounter",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R32_UINT,
+ 1,
+ 1,
+ 1,
+ FFX_RESOURCE_FLAGS_ALIASABLE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_VALUE, sizeof(uint32_t), 0}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS,
+ L"FSR2_DilatedReactiveMasks",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET),
+ FFX_SURFACE_FORMAT_R8G8_UNORM,
+ contextDescription->maxRenderSize.width,
+ contextDescription->maxRenderSize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_ALIASABLE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT,
+ L"FSR2_LanczosLutData",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_READ_ONLY,
+ FFX_SURFACE_FORMAT_R16_SNORM,
+ lanczos2LutWidth,
+ 1,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_BUFFER, sizeof(lanczos2Weights), lanczos2Weights}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY,
+ L"FSR2_DefaultReactivityMask",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_READ_ONLY,
+ FFX_SURFACE_FORMAT_R8_UNORM,
+ 1,
+ 1,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_VALUE, sizeof(uint8_t), 0}},
+
+ {FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT,
+ L"FSR2_MaximumUpsampleBias",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_READ_ONLY,
+ FFX_SURFACE_FORMAT_R16_SNORM,
+ FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH,
+ FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_BUFFER, sizeof(maximumBias), maximumBias}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE,
+ L"FSR2_DefaultExposure",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_READ_ONLY,
+ FFX_SURFACE_FORMAT_R32G32_FLOAT,
+ 1,
+ 1,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_VALUE, sizeof(float) * 2, 0}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE,
+ L"FSR2_AutoExposure",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R32G32_FLOAT,
+ 1,
+ 1,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+
+ // only one for now, will need ping pong to respect the motion vectors
+ {FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE,
+ L"FSR2_AutoReactive",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UNORM,
+ contextDescription->maxRenderSize.width,
+ contextDescription->maxRenderSize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION,
+ L"FSR2_AutoComposition",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UNORM,
+ contextDescription->maxRenderSize.width,
+ contextDescription->maxRenderSize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1,
+ L"FSR2_PrevPreAlpha0",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R11G11B10_FLOAT,
+ contextDescription->maxRenderSize.width,
+ contextDescription->maxRenderSize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1,
+ L"FSR2_PrevPostAlpha0",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R11G11B10_FLOAT,
+ contextDescription->maxRenderSize.width,
+ contextDescription->maxRenderSize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2,
+ L"FSR2_PrevPreAlpha1",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R11G11B10_FLOAT,
+ contextDescription->maxRenderSize.width,
+ contextDescription->maxRenderSize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
+
+ {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2,
+ L"FSR2_PrevPostAlpha1",
+ FFX_RESOURCE_TYPE_TEXTURE2D,
+ FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R11G11B10_FLOAT,
+ contextDescription->maxRenderSize.width,
+ contextDescription->maxRenderSize.height,
+ 1,
+ FFX_RESOURCE_FLAGS_NONE,
+ {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}},
};
// clear the SRV resources to NULL.
@@ -588,13 +850,18 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con
for (int32_t currentSurfaceIndex = 0; currentSurfaceIndex < FFX_ARRAY_ELEMENTS(internalSurfaceDesc); ++currentSurfaceIndex) {
- const Fsr2ResourceDescription* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex];
- const FfxResourceType resourceType = currentSurfaceDescription->height > 1 ? FFX_RESOURCE_TYPE_TEXTURE2D : texture1dResourceType;
- const FfxResourceDescription resourceDescription = { resourceType, currentSurfaceDescription->format, currentSurfaceDescription->width, currentSurfaceDescription->height, 1, currentSurfaceDescription->mipCount };
+ const FfxInternalResourceDescription* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex];
+ const FfxResourceType resourceType = internalSurfaceDesc[currentSurfaceIndex].type;
+ const FfxResourceDescription resourceDescription = { resourceType, currentSurfaceDescription->format, currentSurfaceDescription->width, currentSurfaceDescription->height, 1, currentSurfaceDescription->mipCount, FFX_RESOURCE_FLAGS_NONE, currentSurfaceDescription->usage };
const FfxResourceStates initialState = (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_READ_ONLY) ? FFX_RESOURCE_STATE_COMPUTE_READ : FFX_RESOURCE_STATE_UNORDERED_ACCESS;
- const FfxCreateResourceDescription createResourceDescription = { FFX_HEAP_TYPE_DEFAULT, resourceDescription, initialState, currentSurfaceDescription->initDataSize, currentSurfaceDescription->initData, currentSurfaceDescription->name, currentSurfaceDescription->usage, currentSurfaceDescription->id };
-
- FFX_VALIDATE(context->contextDescription.callbacks.fpCreateResource(&context->contextDescription.callbacks, &createResourceDescription, &context->srvResources[currentSurfaceDescription->id]));
+ const FfxCreateResourceDescription createResourceDescription = {FFX_HEAP_TYPE_DEFAULT,
+ resourceDescription,
+ initialState,
+ currentSurfaceDescription->name,
+ currentSurfaceDescription->id,
+ currentSurfaceDescription->initData};
+
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreateResource(&context->contextDescription.backendInterface, &createResourceDescription, context->effectContextId, &context->srvResources[currentSurfaceDescription->id]));
}
// copy resources to uavResrouces list
@@ -602,48 +869,25 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con
// avoid compiling pipelines on first render
{
- context->refreshPipelineStates = false;
errorCode = createPipelineStates(context);
FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
}
return FFX_OK;
}
-static void fsr2SafeReleasePipeline(FfxFsr2Context_Private* context, FfxPipelineState* pipeline)
-{
- FFX_ASSERT(pipeline);
-
- context->contextDescription.callbacks.fpDestroyPipeline(&context->contextDescription.callbacks, pipeline);
-}
-
-static void fsr2SafeReleaseResource(FfxFsr2Context_Private* context, FfxResourceInternal resource)
-{
- context->contextDescription.callbacks.fpDestroyResource(&context->contextDescription.callbacks, resource);
-}
-
-static void fsr2SafeReleaseDevice(FfxFsr2Context_Private* context, FfxDevice* device)
-{
- if (*device == nullptr) {
- return;
- }
-
- context->contextDescription.callbacks.fpDestroyBackendContext(&context->contextDescription.callbacks);
- *device = nullptr;
-}
-
static FfxErrorCode fsr2Release(FfxFsr2Context_Private* context)
{
FFX_ASSERT(context);
- fsr2SafeReleasePipeline(context, &context->pipelineDepthClip);
- fsr2SafeReleasePipeline(context, &context->pipelineReconstructPreviousDepth);
- fsr2SafeReleasePipeline(context, &context->pipelineLock);
- fsr2SafeReleasePipeline(context, &context->pipelineAccumulate);
- fsr2SafeReleasePipeline(context, &context->pipelineAccumulateSharpen);
- fsr2SafeReleasePipeline(context, &context->pipelineRCAS);
- fsr2SafeReleasePipeline(context, &context->pipelineComputeLuminancePyramid);
- fsr2SafeReleasePipeline(context, &context->pipelineGenerateReactive);
- fsr2SafeReleasePipeline(context, &context->pipelineTcrAutogenerate);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineDepthClip, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineReconstructPreviousDepth, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineLock, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineAccumulate, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineAccumulateSharpen, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineRCAS, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineComputeLuminancePyramid, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineGenerateReactive, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineTcrAutogenerate, context->effectContextId);
// unregister resources not created internally
context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
@@ -658,13 +902,21 @@ static FfxErrorCode fsr2Release(FfxFsr2Context_Private* context)
context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
+ // Release the copy resources for those that had init data
+ ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT], context->effectContextId);
+ ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT], context->effectContextId);
+ ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY], context->effectContextId);
+ ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT], context->effectContextId);
+ ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE], context->effectContextId);
+
// release internal resources
for (int32_t currentResourceIndex = 0; currentResourceIndex < FFX_FSR2_RESOURCE_IDENTIFIER_COUNT; ++currentResourceIndex) {
- fsr2SafeReleaseResource(context, context->srvResources[currentResourceIndex]);
+ ffxSafeReleaseResource(&context->contextDescription.backendInterface, context->srvResources[currentResourceIndex], context->effectContextId);
}
- fsr2SafeReleaseDevice(context, &context->device);
+ // Destroy the context
+ context->contextDescription.backendInterface.fpDestroyBackendContext(&context->contextDescription.backendInterface, context->effectContextId);
return FFX_OK;
}
@@ -720,52 +972,58 @@ static void setupDeviceDepthToViewSpaceDepthParams(FfxFsr2Context_Private* conte
context->constants.deviceToViewDepth[3] = (1.0f / b);
}
-static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY)
+static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription*, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY)
{
- FfxComputeJobDescription jobDescriptor = {};
+ FfxGpuJobDescription dispatchJob = {FFX_GPU_JOB_COMPUTE};
+ wcscpy_s(dispatchJob.jobLabel, pipeline->name);
- for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) {
+ for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) {
- const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier;
+ const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier;
const FfxResourceInternal currentResource = context->srvResources[currentResourceId];
- jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource;
- wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name);
+ dispatchJob.computeJobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource;
+#ifdef FFX_DEBUG
+ wcscpy_s(dispatchJob.computeJobDescriptor.srvTextures[currentShaderResourceViewIndex].name,
+ pipeline->srvTextureBindings[currentShaderResourceViewIndex].name);
+#endif
}
- for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavCount; ++currentUnorderedAccessViewIndex) {
-
- const uint32_t currentResourceId = pipeline->uavResourceBindings[currentUnorderedAccessViewIndex].resourceIdentifier;
- wcscpy_s(jobDescriptor.uavNames[currentUnorderedAccessViewIndex], pipeline->uavResourceBindings[currentUnorderedAccessViewIndex].name);
+ for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavTextureCount; ++currentUnorderedAccessViewIndex) {
+ const uint32_t currentResourceId = pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].resourceIdentifier;
+#ifdef FFX_DEBUG
+ wcscpy_s(dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].name,
+ pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].name);
+#endif
if (currentResourceId >= FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 && currentResourceId <= FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12)
{
const FfxResourceInternal currentResource = context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE];
- jobDescriptor.uavs[currentUnorderedAccessViewIndex] = currentResource;
- jobDescriptor.uavMip[currentUnorderedAccessViewIndex] = currentResourceId - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0;
+ dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource;
+ dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip =
+ currentResourceId - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0;
}
else
{
const FfxResourceInternal currentResource = context->uavResources[currentResourceId];
- jobDescriptor.uavs[currentUnorderedAccessViewIndex] = currentResource;
- jobDescriptor.uavMip[currentUnorderedAccessViewIndex] = 0;
+ dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource;
+ dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = 0;
}
}
-
- jobDescriptor.dimensions[0] = dispatchX;
- jobDescriptor.dimensions[1] = dispatchY;
- jobDescriptor.dimensions[2] = 1;
- jobDescriptor.pipeline = *pipeline;
+
+ dispatchJob.computeJobDescriptor.dimensions[0] = dispatchX;
+ dispatchJob.computeJobDescriptor.dimensions[1] = dispatchY;
+ dispatchJob.computeJobDescriptor.dimensions[2] = 1;
+ dispatchJob.computeJobDescriptor.pipeline = *pipeline;
for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) {
- wcscpy_s( jobDescriptor.cbNames[currentRootConstantIndex], pipeline->cbResourceBindings[currentRootConstantIndex].name);
- jobDescriptor.cbs[currentRootConstantIndex] = globalFsr2ConstantBuffers[pipeline->cbResourceBindings[currentRootConstantIndex].resourceIdentifier];
- jobDescriptor.cbSlotIndex[currentRootConstantIndex] = pipeline->cbResourceBindings[currentRootConstantIndex].slotIndex;
+#ifdef FFX_DEBUG
+ wcscpy_s(dispatchJob.computeJobDescriptor.cbNames[currentRootConstantIndex], pipeline->constantBufferBindings[currentRootConstantIndex].name);
+#endif
+ dispatchJob.computeJobDescriptor.cbs[currentRootConstantIndex] = context->constantBuffers[pipeline->constantBufferBindings[currentRootConstantIndex].resourceIdentifier];
}
- FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE };
- dispatchJob.computeJobDescriptor = jobDescriptor;
- context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &dispatchJob);
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &dispatchJob);
}
static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params)
@@ -774,37 +1032,28 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
{
fsr2DebugCheckDispatch(context, params);
}
+
// take a short cut to the command list
FfxCommandList commandList = params->commandList;
- // try and refresh shaders first. Early exit in case of error.
- if (context->refreshPipelineStates) {
-
- context->refreshPipelineStates = false;
-
- const FfxErrorCode errorCode = createPipelineStates(context);
- FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
- }
-
if (context->firstExecution)
{
FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT };
+ wcscpy_s(clearJob.jobLabel, L"Zero initialize resource");
const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f };
memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float));
clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1];
- context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2];
- context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR];
- context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
}
// Prepare per frame descriptor tables
const bool isOddFrame = !!(context->resourceFrameIndex & 1);
- const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_COUNT : 0;
- const uint32_t currentGpuTableBase = 2 * FFX_FSR2_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex;
const uint32_t lockStatusSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1;
const uint32_t lockStatusUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2;
const uint32_t upscaledColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1;
@@ -822,9 +1071,9 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
const bool resetAccumulation = params->reset || context->firstExecution;
context->firstExecution = false;
- context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->color, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
- context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->depth, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH]);
- context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->motionVectors, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS]);
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->color, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->depth, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH]);
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->motionVectors, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS]);
// if auto exposure is enabled use the auto exposure SRV, otherwise what the app sends.
if (context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE) {
@@ -833,29 +1082,29 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
if (ffxFsr2ResourceIsNull(params->exposure)) {
context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE];
} else {
- context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->exposure, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE]);
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->exposure, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE]);
}
}
-
+
if (params->enableAutoReactive)
{
- context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->colorOpaqueOnly, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]);
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->colorOpaqueOnly, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]);
}
-
+
if (ffxFsr2ResourceIsNull(params->reactive)) {
context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY];
}
else {
- context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->reactive, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]);
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->reactive, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]);
}
-
+
if (ffxFsr2ResourceIsNull(params->transparencyAndComposition)) {
context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY];
} else {
- context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->transparencyAndComposition, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK]);
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->transparencyAndComposition, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK]);
}
- context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->output, &context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT]);
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->output, context->effectContextId, &context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT]);
context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS] = context->srvResources[lockStatusSrvResourceIndex];
context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = context->srvResources[upscaledColorSrvResourceIndex];
context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS] = context->uavResources[lockStatusUavResourceIndex];
@@ -875,9 +1124,9 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR] = context->uavResources[prevPostAlphaColorUavResourceIndex];
// actual resource size may differ from render/display resolution (e.g. due to Hw/API restrictions), so query the descriptor for UVs adjustment
- const FfxResourceDescription resourceDescInputColor = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
- const FfxResourceDescription resourceDescLockStatus = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[lockStatusSrvResourceIndex]);
- const FfxResourceDescription resourceDescReactiveMask = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]);
+ const FfxResourceDescription resourceDescInputColor = context->contextDescription.backendInterface.fpGetResourceDescription(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
+ const FfxResourceDescription resourceDescLockStatus = context->contextDescription.backendInterface.fpGetResourceDescription(&context->contextDescription.backendInterface, context->srvResources[lockStatusSrvResourceIndex]);
+ const FfxResourceDescription resourceDescReactiveMask = context->contextDescription.backendInterface.fpGetResourceDescription(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]);
FFX_ASSERT(resourceDescInputColor.type == FFX_RESOURCE_TYPE_TEXTURE2D);
FFX_ASSERT(resourceDescLockStatus.type == FFX_RESOURCE_TYPE_TEXTURE2D);
@@ -956,16 +1205,16 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
// reactive mask bias
const int32_t threadGroupWorkRegionDim = 8;
- const int32_t dispatchSrcX = (context->constants.renderSize[0] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
- const int32_t dispatchSrcY = (context->constants.renderSize[1] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
- const int32_t dispatchDstX = (context->contextDescription.displaySize.width + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
- const int32_t dispatchDstY = (context->contextDescription.displaySize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ const int32_t dispatchSrcX = FFX_DIVIDE_ROUNDING_UP(context->constants.renderSize[0], threadGroupWorkRegionDim);
+ const int32_t dispatchSrcY = FFX_DIVIDE_ROUNDING_UP(context->constants.renderSize[1], threadGroupWorkRegionDim);
+ const int32_t dispatchDstX = FFX_DIVIDE_ROUNDING_UP(context->contextDescription.displaySize.width, threadGroupWorkRegionDim);
+ const int32_t dispatchDstY = FFX_DIVIDE_ROUNDING_UP(context->contextDescription.displaySize.height, threadGroupWorkRegionDim);
// Clear reconstructed depth for max depth store.
if (resetAccumulation) {
FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT };
-
+ wcscpy_s(clearJob.jobLabel, L"Zero initialize resource");
// LockStatus resource has no sign bit, callback functions are compensating for this.
// Clearing the resource must follow the same logic.
float clearValuesLockStatus[4]{};
@@ -974,15 +1223,15 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
memcpy(clearJob.clearJobDescriptor.color, clearValuesLockStatus, 4 * sizeof(float));
clearJob.clearJobDescriptor.target = context->srvResources[lockStatusSrvResourceIndex];
- context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f };
memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float));
clearJob.clearJobDescriptor.target = context->srvResources[upscaledColorSrvResourceIndex];
- context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE];
- context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
//if (context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE)
// Auto exposure always used to track luma changes in locking logic
@@ -990,7 +1239,7 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
const float clearValuesExposure[]{ -1.f, 1e8f, 0.f, 0.f };
memcpy(clearJob.clearJobDescriptor.color, clearValuesExposure, 4 * sizeof(float));
clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE];
- context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
}
}
@@ -999,7 +1248,7 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
uint32_t workGroupOffset[2];
uint32_t numWorkGroupsAndMips[2];
uint32_t rectInfo[4] = { 0, 0, params->renderSize.width, params->renderSize.height };
- SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo);
+ ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo);
// downsample
Fsr2SpdConstants luminancePyramidConstants;
@@ -1022,10 +1271,22 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
genReactiveConsts.autoReactiveMax = params->autoReactiveMax;
// initialize constantBuffers data
- memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2].data, &context->constants, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2].uint32Size * sizeof(uint32_t));
- memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD].data, &luminancePyramidConstants, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD].uint32Size * sizeof(uint32_t));
- memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS].data, &rcasConsts, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS].uint32Size * sizeof(uint32_t));
- memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE].data, &genReactiveConsts, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE].uint32Size * sizeof(uint32_t));
+ context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface,
+ &context->constants,
+ sizeof(Fsr2Constants),
+ &context->constantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2]);
+ context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface,
+ &luminancePyramidConstants,
+ sizeof(Fsr2SpdConstants),
+ &context->constantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD]);
+ context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface,
+ &rcasConsts,
+ sizeof(Fsr2RcasConstants),
+ &context->constantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS]);
+ context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface,
+ &genReactiveConsts,
+ sizeof(Fsr2GenerateReactiveConstants),
+ &context->constantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE]);
// Auto reactive
if (params->enableAutoReactive)
@@ -1034,6 +1295,7 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION];
}
+
scheduleDispatch(context, params, &context->pipelineComputeLuminancePyramid, dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1]);
scheduleDispatch(context, params, &context->pipelineReconstructPreviousDepth, dispatchSrcX, dispatchSrcY);
scheduleDispatch(context, params, &context->pipelineDepthClip, dispatchSrcX, dispatchSrcY);
@@ -1048,8 +1310,8 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
// dispatch RCAS
const int32_t threadGroupWorkRegionDimRCAS = 16;
- const int32_t dispatchX = (context->contextDescription.displaySize.width + (threadGroupWorkRegionDimRCAS - 1)) / threadGroupWorkRegionDimRCAS;
- const int32_t dispatchY = (context->contextDescription.displaySize.height + (threadGroupWorkRegionDimRCAS - 1)) / threadGroupWorkRegionDimRCAS;
+ const int32_t dispatchX = FFX_DIVIDE_ROUNDING_UP(context->contextDescription.displaySize.width, threadGroupWorkRegionDimRCAS);
+ const int32_t dispatchY = FFX_DIVIDE_ROUNDING_UP(context->contextDescription.displaySize.height, threadGroupWorkRegionDimRCAS);
scheduleDispatch(context, params, &context->pipelineRCAS, dispatchX, dispatchY);
}
@@ -1058,10 +1320,10 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
// Fsr2MaxQueuedFrames must be an even number.
FFX_STATIC_ASSERT((FSR2_MAX_QUEUED_FRAMES & 1) == 0);
- context->contextDescription.callbacks.fpExecuteGpuJobs(&context->contextDescription.callbacks, commandList);
+ context->contextDescription.backendInterface.fpExecuteGpuJobs(&context->contextDescription.backendInterface, commandList, context->effectContextId);
// release dynamic resources
- context->contextDescription.callbacks.fpUnregisterResources(&context->contextDescription.callbacks);
+ context->contextDescription.backendInterface.fpUnregisterResources(&context->contextDescription.backendInterface, commandList, context->effectContextId);
return FFX_OK;
}
@@ -1080,14 +1342,15 @@ FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2ContextD
FFX_ERROR_INVALID_POINTER);
// validate that all callbacks are set for the interface
- FFX_RETURN_ON_ERROR(contextDescription->callbacks.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE);
- FFX_RETURN_ON_ERROR(contextDescription->callbacks.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
- FFX_RETURN_ON_ERROR(contextDescription->callbacks.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetSDKVersion, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
// if a scratch buffer is declared, then we must have a size
- if (contextDescription->callbacks.scratchBuffer) {
+ if (contextDescription->backendInterface.scratchBuffer) {
- FFX_RETURN_ON_ERROR(contextDescription->callbacks.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE);
}
// ensure the context is large enough for the internal context.
@@ -1100,6 +1363,21 @@ FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2ContextD
return errorCode;
}
+FFX_API FfxErrorCode ffxFsr2ContextGetGpuMemoryUsage(FfxFsr2Context* context, FfxEffectMemoryUsage* vramUsage)
+{
+ FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(vramUsage, FFX_ERROR_INVALID_POINTER);
+ FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)(context);
+
+ FFX_RETURN_ON_ERROR(contextPrivate->device, FFX_ERROR_NULL_DEVICE);
+
+ FfxErrorCode errorCode = contextPrivate->contextDescription.backendInterface.fpGetEffectGpuMemoryUsage(
+ &contextPrivate->contextDescription.backendInterface, contextPrivate->effectContextId, vramUsage);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ return FFX_OK;
+}
+
FfxErrorCode ffxFsr2ContextDestroy(FfxFsr2Context* context)
{
FFX_RETURN_ON_ERROR(
@@ -1183,18 +1461,6 @@ FfxErrorCode ffxFsr2GetRenderResolutionFromQualityMode(
return FFX_OK;
}
-FfxErrorCode ffxFsr2ContextEnqueueRefreshPipelineRequest(FfxFsr2Context* context)
-{
- FFX_RETURN_ON_ERROR(
- context,
- FFX_ERROR_INVALID_POINTER);
-
- FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)context;
- contextPrivate->refreshPipelineStates = true;
-
- return FFX_OK;
-}
-
int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth)
{
const float basePhaseCount = 8.0f;
@@ -1245,12 +1511,6 @@ FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const F
contextPrivate->device,
FFX_ERROR_NULL_DEVICE);
- if (contextPrivate->refreshPipelineStates) {
-
- createPipelineStates(contextPrivate);
- contextPrivate->refreshPipelineStates = false;
- }
-
// take a short cut to the command list
FfxCommandList commandList = params->commandList;
@@ -1264,27 +1524,31 @@ FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const F
FfxResourceInternal internalReactive = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
FfxComputeJobDescription jobDescriptor = {};
- contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->colorOpaqueOnly, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]);
- contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->colorPreUpscale, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
- contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->outReactive, &contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]);
-
- jobDescriptor.uavs[0] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->colorOpaqueOnly, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]);
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->colorPreUpscale, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->outReactive, contextPrivate->effectContextId, &contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]);
- wcscpy_s(jobDescriptor.srvNames[0], pipeline->srvResourceBindings[0].name);
- wcscpy_s(jobDescriptor.srvNames[1], pipeline->srvResourceBindings[1].name);
- wcscpy_s(jobDescriptor.uavNames[0], pipeline->uavResourceBindings[0].name);
+ jobDescriptor.uavTextures[0].resource = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
+
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.srvTextures[0].name, pipeline->srvTextureBindings[0].name);
+ wcscpy_s(jobDescriptor.srvTextures[1].name, pipeline->srvTextureBindings[1].name);
+ wcscpy_s(jobDescriptor.uavTextures[0].name, pipeline->uavTextureBindings[0].name);
+#endif
jobDescriptor.dimensions[0] = dispatchSrcX;
jobDescriptor.dimensions[1] = dispatchSrcY;
jobDescriptor.dimensions[2] = 1;
jobDescriptor.pipeline = *pipeline;
- for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) {
+ for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) {
- const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier;
+ const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier;
const FfxResourceInternal currentResource = contextPrivate->srvResources[currentResourceId];
- jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource;
- wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name);
+ jobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource;
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.srvTextures[currentShaderResourceViewIndex].name, pipeline->srvTextureBindings[currentShaderResourceViewIndex].name);
+#endif
}
Fsr2GenerateReactiveConstants constants = {};
@@ -1293,34 +1557,33 @@ FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const F
constants.binaryValue = params->binaryValue;
constants.flags = params->flags;
- jobDescriptor.cbs[0].uint32Size = sizeof(constants);
- memcpy(&jobDescriptor.cbs[0].data, &constants, sizeof(constants));
- wcscpy_s(jobDescriptor.cbNames[0], pipeline->cbResourceBindings[0].name);
-
+ contextPrivate->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&contextPrivate->contextDescription.backendInterface,
+ &constants,
+ sizeof(constants),
+ &jobDescriptor.cbs[0]);
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.cbNames[0], pipeline->constantBufferBindings[0].name);
+#endif
FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE };
+ wcscpy_s(dispatchJob.jobLabel, pipeline->name);
dispatchJob.computeJobDescriptor = jobDescriptor;
- contextPrivate->contextDescription.callbacks.fpScheduleGpuJob(&contextPrivate->contextDescription.callbacks, &dispatchJob);
+ //contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &dispatchJob);
- contextPrivate->contextDescription.callbacks.fpExecuteGpuJobs(&contextPrivate->contextDescription.callbacks, commandList);
+ contextPrivate->contextDescription.backendInterface.fpExecuteGpuJobs(
+ &contextPrivate->contextDescription.backendInterface, commandList, contextPrivate->effectContextId);
// restore internal reactive
contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE] = internalReactive;
+ // release dynamic resources
+ contextPrivate->contextDescription.backendInterface.fpUnregisterResources(&contextPrivate->contextDescription.backendInterface, commandList, contextPrivate->effectContextId);
+
return FFX_OK;
}
static FfxErrorCode generateReactiveMaskInternal(FfxFsr2Context_Private* contextPrivate, const FfxFsr2DispatchDescription* params)
{
- if (contextPrivate->refreshPipelineStates) {
-
- createPipelineStates(contextPrivate);
- contextPrivate->refreshPipelineStates = false;
- }
-
- // take a short cut to the command list
- FfxCommandList commandList = params->commandList;
-
FfxPipelineState* pipeline = &contextPrivate->pipelineTcrAutogenerate;
const int32_t threadGroupWorkRegionDim = 8;
@@ -1328,42 +1591,60 @@ static FfxErrorCode generateReactiveMaskInternal(FfxFsr2Context_Private* context
const int32_t dispatchSrcY = (params->renderSize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
FfxComputeJobDescription jobDescriptor = {};
- contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->colorOpaqueOnly, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]);
- contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->color, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
-
- jobDescriptor.uavs[0] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
- jobDescriptor.uavs[1] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION];
- jobDescriptor.uavs[2] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR];
- jobDescriptor.uavs[3] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR];
-
- wcscpy_s(jobDescriptor.uavNames[0], pipeline->uavResourceBindings[0].name);
- wcscpy_s(jobDescriptor.uavNames[1], pipeline->uavResourceBindings[1].name);
- wcscpy_s(jobDescriptor.uavNames[2], pipeline->uavResourceBindings[2].name);
- wcscpy_s(jobDescriptor.uavNames[3], pipeline->uavResourceBindings[3].name);
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->colorOpaqueOnly, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]);
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->color, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
+
+ jobDescriptor.uavTextures[0].resource = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
+ jobDescriptor.uavTextures[1].resource = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION];
+ jobDescriptor.uavTextures[2].resource = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR];
+ jobDescriptor.uavTextures[3].resource = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR];
+
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.uavTextures[0].name, pipeline->uavTextureBindings[0].name);
+ wcscpy_s(jobDescriptor.uavTextures[1].name, pipeline->uavTextureBindings[1].name);
+ wcscpy_s(jobDescriptor.uavTextures[2].name, pipeline->uavTextureBindings[2].name);
+ wcscpy_s(jobDescriptor.uavTextures[3].name, pipeline->uavTextureBindings[3].name);
+#endif
jobDescriptor.dimensions[0] = dispatchSrcX;
jobDescriptor.dimensions[1] = dispatchSrcY;
jobDescriptor.dimensions[2] = 1;
jobDescriptor.pipeline = *pipeline;
- for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) {
+ for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) {
- const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier;
+ const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier;
const FfxResourceInternal currentResource = contextPrivate->srvResources[currentResourceId];
- jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource;
- wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name);
+ jobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource;
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.srvTextures[currentShaderResourceViewIndex].name, pipeline->srvTextureBindings[currentShaderResourceViewIndex].name);
+#endif
}
for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) {
- wcscpy_s(jobDescriptor.cbNames[currentRootConstantIndex], pipeline->cbResourceBindings[currentRootConstantIndex].name);
- jobDescriptor.cbs[currentRootConstantIndex] = globalFsr2ConstantBuffers[pipeline->cbResourceBindings[currentRootConstantIndex].resourceIdentifier];
- jobDescriptor.cbSlotIndex[currentRootConstantIndex] = pipeline->cbResourceBindings[currentRootConstantIndex].slotIndex;
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.cbNames[currentRootConstantIndex], pipeline->constantBufferBindings[currentRootConstantIndex].name);
+#endif
+ jobDescriptor.cbs[currentRootConstantIndex] = contextPrivate->constantBuffers[pipeline->constantBufferBindings[currentRootConstantIndex].resourceIdentifier];
+ //jobDescriptor.cbSlotIndex[currentRootConstantIndex] = pipeline->constantBufferBindings[currentRootConstantIndex].slotIndex;
}
FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE };
+ wcscpy_s(dispatchJob.jobLabel, pipeline->name);
dispatchJob.computeJobDescriptor = jobDescriptor;
- contextPrivate->contextDescription.callbacks.fpScheduleGpuJob(&contextPrivate->contextDescription.callbacks, &dispatchJob);
+ contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &dispatchJob);
+
+ return FFX_OK;
+}
+
+FFX_API FfxVersionNumber ffxFsr2GetEffectVersion()
+{
+ return FFX_SDK_MAKE_VERSION(FFX_FSR2_VERSION_MAJOR, FFX_FSR2_VERSION_MINOR, FFX_FSR2_VERSION_PATCH);
+}
+FFX_API FfxErrorCode ffxFsr2SetGlobalDebugMessage(ffxMessageCallback fpMessage, uint32_t debugLevel)
+{
+ ffxSetPrintMessageCallback(fpMessage, debugLevel);
return FFX_OK;
}
diff --git a/thirdparty/amd-fsr2/ffx_fsr2.h b/thirdparty/amd-ffx/ffx_fsr2.h
similarity index 77%
rename from thirdparty/amd-fsr2/ffx_fsr2.h
rename to thirdparty/amd-ffx/ffx_fsr2.h
index dfcd4caf3503..86d89b3166ef 100644
--- a/thirdparty/amd-fsr2/ffx_fsr2.h
+++ b/thirdparty/amd-ffx/ffx_fsr2.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -19,38 +20,80 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
-
-// @defgroup FSR2
-
#pragma once
// Include the interface for the backend of the FSR2 API.
-#include "ffx_fsr2_interface.h"
+#include "ffx_interface.h"
+
+/// @defgroup ffxFsr2 FidelityFX FSR2
+/// FidelityFX Super Resolution 2 runtime library
+///
+/// @ingroup SDKComponents
/// FidelityFX Super Resolution 2 major version.
///
-/// @ingroup FSR2
+/// @ingroup ffxFsr2
#define FFX_FSR2_VERSION_MAJOR (2)
/// FidelityFX Super Resolution 2 minor version.
///
-/// @ingroup FSR2
-#define FFX_FSR2_VERSION_MINOR (2)
+/// @ingroup ffxFsr2
+#define FFX_FSR2_VERSION_MINOR (3)
/// FidelityFX Super Resolution 2 patch version.
///
-/// @ingroup FSR2
-#define FFX_FSR2_VERSION_PATCH (1)
+/// @ingroup ffxFsr2
+#define FFX_FSR2_VERSION_PATCH (3)
+
+/// FidelityFX Super Resolution 2 context count
+///
+/// Defines the number of internal effect contexts required by FSR2
+///
+/// @ingroup ffxFsr2
+#define FFX_FSR2_CONTEXT_COUNT 1
/// The size of the context specified in 32bit values.
///
-/// @ingroup FSR2
-#define FFX_FSR2_CONTEXT_SIZE (16536)
+/// @ingroup ffxFsr2
+#define FFX_FSR2_CONTEXT_SIZE (FFX_SDK_DEFAULT_CONTEXT_SIZE)
#if defined(__cplusplus)
extern "C" {
#endif // #if defined(__cplusplus)
+/// An enumeration of all the passes which constitute the FSR2 algorithm.
+///
+/// FSR2 is implemented as a composite of several compute passes each
+/// computing a key part of the final result. Each call to the
+/// FfxFsr2ScheduleGpuJobFunc callback function will
+/// correspond to a single pass included in FfxFsr2Pass. For a
+/// more comprehensive description of each pass, please refer to the FSR2
+/// reference documentation.
+///
+/// Please note in some cases e.g.: FFX_FSR2_PASS_ACCUMULATE
+/// and FFX_FSR2_PASS_ACCUMULATE_SHARPEN either one pass or the
+/// other will be used (they are mutually exclusive). The choice of which will
+/// depend on the way the FfxFsr2Context is created and the
+/// precise contents of FfxFsr2DispatchParamters each time a call
+/// is made to ffxFsr2ContextDispatch.
+///
+/// @ingroup ffxFsr2
+typedef enum FfxFsr2Pass
+{
+
+ FFX_FSR2_PASS_DEPTH_CLIP = 0, ///< A pass which performs depth clipping.
+ FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH = 1, ///< A pass which performs reconstruction of previous frame's depth.
+ FFX_FSR2_PASS_LOCK = 2, ///< A pass which calculates pixel locks.
+ FFX_FSR2_PASS_ACCUMULATE = 3, ///< A pass which performs upscaling.
+ FFX_FSR2_PASS_ACCUMULATE_SHARPEN = 4, ///< A pass which performs upscaling when sharpening is used.
+ FFX_FSR2_PASS_RCAS = 5, ///< A pass which performs sharpening.
+ FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID = 6, ///< A pass which generates the luminance mipmap chain for the current frame.
+ FFX_FSR2_PASS_GENERATE_REACTIVE = 7, ///< An optional pass to generate a reactive mask.
+ FFX_FSR2_PASS_TCR_AUTOGENERATE = 8, ///< An optional pass to automatically generate transparency/composition and reactive masks.
+
+ FFX_FSR2_PASS_COUNT ///< The number of passes performed by FSR2.
+} FfxFsr2Pass;
+
/// An enumeration of all the quality modes supported by FidelityFX Super
/// Resolution 2 upscaling.
///
@@ -70,7 +113,7 @@ extern "C" {
/// of using this scaling mode before deciding if you should include it in your
/// application.
///
-/// @ingroup FSR2
+/// @ingroup ffxFsr2
typedef enum FfxFsr2QualityMode {
FFX_FSR2_QUALITY_MODE_QUALITY = 1, ///< Perform upscaling with a per-dimension upscaling ratio of 1.5x.
@@ -82,7 +125,7 @@ typedef enum FfxFsr2QualityMode {
/// An enumeration of bit flags used when creating a
/// FfxFsr2Context. See FfxFsr2ContextDescription.
///
-/// @ingroup FSR2
+/// @ingroup ffxFsr2
typedef enum FfxFsr2InitializationFlagBits {
FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE = (1<<0), ///< A bit indicating if the input color data provided is using a high-dynamic range.
@@ -96,25 +139,36 @@ typedef enum FfxFsr2InitializationFlagBits {
FFX_FSR2_ENABLE_DEBUG_CHECKING = (1<<8), ///< A bit indicating that the runtime should check some API values and report issues.
} FfxFsr2InitializationFlagBits;
+/// Pass a string message
+///
+/// Used for debug messages.
+///
+/// @param [in] type The type of message.
+/// @param [in] message A string message to pass.
+///
+///
+/// @ingroup ffxFsr2
+typedef void(*FfxFsr2Message)(
+ FfxMsgType type,
+ const wchar_t* message);
+
/// A structure encapsulating the parameters required to initialize FidelityFX
/// Super Resolution 2 upscaling.
///
-/// @ingroup FSR2
+/// @ingroup ffxFsr2
typedef struct FfxFsr2ContextDescription {
uint32_t flags; ///< A collection of FfxFsr2InitializationFlagBits.
FfxDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at.
FfxDimensions2D displaySize; ///< The size of the presentation resolution targeted by the upscaling process.
- FfxFsr2Interface callbacks; ///< A set of pointers to the backend implementation for FSR 2.0.
- FfxDevice device; ///< The abstracted device which is passed to some callback functions.
-
- FfxFsr2Message fpMessage; ///< A pointer to a function that can recieve messages from the runtime.
+ FfxFsr2Message fpMessage; ///< A pointer to a function that can receive messages from the runtime.
+ FfxInterface backendInterface; ///< A set of pointers to the backend implementation for FidelityFX SDK
} FfxFsr2ContextDescription;
/// A structure encapsulating the parameters for dispatching the various passes
/// of FidelityFX Super Resolution 2.
///
-/// @ingroup FSR2
+/// @ingroup ffxFsr2
typedef struct FfxFsr2DispatchDescription {
FfxCommandList commandList; ///< The FfxCommandList to record FSR2 rendering commands into.
@@ -146,12 +200,15 @@ typedef struct FfxFsr2DispatchDescription {
float autoReactiveScale; ///< A value to scale the reactive mask
float autoReactiveMax; ///< A value to clamp the reactive mask
- float reprojectionMatrix[16]; ///< The matrix used for reprojecting pixels with invalid motion vectors by using the depth.
+ // GODOT BEGINS
+ float reprojectionMatrix[16];
+ // GODOT ENDS
+
} FfxFsr2DispatchDescription;
/// A structure encapsulating the parameters for automatic generation of a reactive mask
///
-/// @ingroup FSR2
+/// @ingroup ffxFsr2
typedef struct FfxFsr2GenerateReactiveDescription {
FfxCommandList commandList; ///< The FfxCommandList to record FSR2 rendering commands into.
@@ -176,12 +233,13 @@ typedef struct FfxFsr2GenerateReactiveDescription {
/// It is therefore recommended that the GPU is idle before destroying the
/// FSR2 context.
///
-/// @ingroup FSR2
-typedef struct FfxFsr2Context {
-
- uint32_t data[FFX_FSR2_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context.
+/// @ingroup ffxFsr2
+typedef struct FfxFsr2Context
+{
+ uint32_t data[FFX_FSR2_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context.
} FfxFsr2Context;
+
/// Create a FidelityFX Super Resolution 2 context from the parameters
/// programmed to the FfxFsr2CreateParams structure.
///
@@ -213,8 +271,8 @@ typedef struct FfxFsr2Context {
/// disabled by a user. To destroy the FSR2 context you should call
/// ffxFsr2ContextDestroy.
///
-/// @param [out] context A pointer to a FfxFsr2Context structure to populate.
-/// @param [in] contextDescription A pointer to a FfxFsr2ContextDescription structure.
+/// @param [out] pContext A pointer to a FfxFsr2Context structure to populate.
+/// @param [in] pContextDescription A pointer to a FfxFsr2ContextDescription structure.
///
/// @retval
/// FFX_OK The operation completed successfully.
@@ -225,8 +283,21 @@ typedef struct FfxFsr2Context {
/// @retval
/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend.
///
-/// @ingroup FSR2
-FFX_API FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2ContextDescription* contextDescription);
+/// @ingroup ffxFsr2
+FFX_API FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* pContext, const FfxFsr2ContextDescription* pContextDescription);
+
+/// Get GPU memory usage of the FidelityFX Super Resolution context.
+///
+/// @param [in] pContext A pointer to a FfxFsr2Context structure.
+/// @param [out] pVramUsage A pointer to a FfxEffectMemoryUsage structure.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or vramUsage were NULL.
+///
+/// @ingroup ffxFsr2
+FFX_API FfxErrorCode ffxFsr2ContextGetGpuMemoryUsage(FfxFsr2Context* pContext, FfxEffectMemoryUsage* pVramUsage);
/// Dispatch the various passes that constitute FidelityFX Super Resolution 2.
///
@@ -252,8 +323,8 @@ FFX_API FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2
/// documentation for ffxFsr2GetJitterOffset as well as the
/// accompanying overview documentation for FSR2.
///
-/// @param [in] context A pointer to a FfxFsr2Context structure.
-/// @param [in] dispatchDescription A pointer to a FfxFsr2DispatchDescription structure.
+/// @param [in] pContext A pointer to a FfxFsr2Context structure.
+/// @param [in] pDispatchDescription A pointer to a FfxFsr2DispatchDescription structure.
///
/// @retval
/// FFX_OK The operation completed successfully.
@@ -266,31 +337,31 @@ FFX_API FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2
/// @retval
/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend.
///
-/// @ingroup FSR2
-FFX_API FfxErrorCode ffxFsr2ContextDispatch(FfxFsr2Context* context, const FfxFsr2DispatchDescription* dispatchDescription);
+/// @ingroup ffxFsr2
+FFX_API FfxErrorCode ffxFsr2ContextDispatch(FfxFsr2Context* pContext, const FfxFsr2DispatchDescription* pDispatchDescription);
/// A helper function generate a Reactive mask from an opaque only texure and one containing translucent objects.
///
-/// @param [in] context A pointer to a FfxFsr2Context structure.
-/// @param [in] params A pointer to a FfxFsr2GenerateReactiveDescription structure
+/// @param [in] pContext A pointer to a FfxFsr2Context structure.
+/// @param [in] pParams A pointer to a FfxFsr2GenerateReactiveDescription structure
///
/// @retval
/// FFX_OK The operation completed successfully.
///
-/// @ingroup FSR2
-FFX_API FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const FfxFsr2GenerateReactiveDescription* params);
+/// @ingroup ffxFsr2
+FFX_API FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* pContext, const FfxFsr2GenerateReactiveDescription* pParams);
/// Destroy the FidelityFX Super Resolution context.
///
-/// @param [out] context A pointer to a FfxFsr2Context structure to destroy.
+/// @param [out] pContext A pointer to a FfxFsr2Context structure to destroy.
///
/// @retval
/// FFX_OK The operation completed successfully.
/// @retval
/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context was NULL.
///
-/// @ingroup FSR2
-FFX_API FfxErrorCode ffxFsr2ContextDestroy(FfxFsr2Context* context);
+/// @ingroup ffxFsr2
+FFX_API FfxErrorCode ffxFsr2ContextDestroy(FfxFsr2Context* pContext);
/// Get the upscale ratio from the quality mode.
///
@@ -312,7 +383,7 @@ FFX_API FfxErrorCode ffxFsr2ContextDestroy(FfxFsr2Context* context);
/// The upscaling the per-dimension upscaling ratio for
/// qualityMode according to the table above.
///
-/// @ingroup FSR2
+/// @ingroup ffxFsr2
FFX_API float ffxFsr2GetUpscaleRatioFromQualityMode(FfxFsr2QualityMode qualityMode);
/// A helper function to calculate the rendering resolution from a target
@@ -321,8 +392,8 @@ FFX_API float ffxFsr2GetUpscaleRatioFromQualityMode(FfxFsr2QualityMode qualityMo
/// This function applies the scaling factor returned by
/// ffxFsr2GetUpscaleRatioFromQualityMode to each dimension.
///
-/// @param [out] renderWidth A pointer to a uint32_t which will hold the calculated render resolution width.
-/// @param [out] renderHeight A pointer to a uint32_t which will hold the calculated render resolution height.
+/// @param [out] pRenderWidth A pointer to a uint32_t which will hold the calculated render resolution width.
+/// @param [out] pRenderHeight A pointer to a uint32_t which will hold the calculated render resolution height.
/// @param [in] displayWidth The target display resolution width.
/// @param [in] displayHeight The target display resolution height.
/// @param [in] qualityMode The desired quality mode for FSR 2 upscaling.
@@ -334,10 +405,10 @@ FFX_API float ffxFsr2GetUpscaleRatioFromQualityMode(FfxFsr2QualityMode qualityMo
/// @retval
/// FFX_ERROR_INVALID_ENUM An invalid quality mode was specified.
///
-/// @ingroup FSR2
+/// @ingroup ffxFsr2
FFX_API FfxErrorCode ffxFsr2GetRenderResolutionFromQualityMode(
- uint32_t* renderWidth,
- uint32_t* renderHeight,
+ uint32_t* pRenderWidth,
+ uint32_t* pRenderHeight,
uint32_t displayWidth,
uint32_t displayHeight,
FfxFsr2QualityMode qualityMode);
@@ -348,7 +419,7 @@ FFX_API FfxErrorCode ffxFsr2GetRenderResolutionFromQualityMode(
/// For more detailed information about the application of camera jitter to
/// your application's rendering please refer to the
/// ffxFsr2GetJitterOffset function.
-///
+///
/// The table below shows the jitter phase count which this function
/// would return for each of the quality presets.
///
@@ -366,7 +437,7 @@ FFX_API FfxErrorCode ffxFsr2GetRenderResolutionFromQualityMode(
/// @returns
/// The jitter phase count for the scaling factor between renderWidth and displayWidth.
///
-/// @ingroup FSR2
+/// @ingroup ffxFsr2
FFX_API int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth);
/// A helper function to calculate the subpixel jitter offset.
@@ -398,12 +469,12 @@ FFX_API int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayW
/// float jitterX = 0;
/// float jitterY = 0;
/// ffxFsr2GetJitterOffset(&jitterX, &jitterY, index, jitterPhaseCount);
-///
+///
/// const float jitterX = 2.0f * jitterX / (float)renderWidth;
/// const float jitterY = -2.0f * jitterY / (float)renderHeight;
/// const Matrix4 jitterTranslationMatrix = translateMatrix(Matrix3::identity, Vector3(jitterX, jitterY, 0));
/// const Matrix4 jitteredProjectionMatrix = jitterTranslationMatrix * projectionMatrix;
-///
+///
/// Jitter should be applied to all rendering. This includes opaque, alpha
/// transparent, and raytraced objects. For rasterized objects, the sub-pixel
/// jittering values calculated by the iffxFsr2GetJitterOffset
@@ -411,31 +482,31 @@ FFX_API int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayW
/// used to perform transformations during vertex shading. For raytraced
/// rendering, the sub-pixel jitter should be applied to the ray's origin,
/// often the camera's position.
-///
+///
/// Whether you elect to use the ffxFsr2GetJitterOffset function
/// or your own sequence generator, you must program the
/// jitterOffset field of the
/// FfxFsr2DispatchParameters structure in order to inform FSR2
/// of the jitter offset that has been applied in order to render each frame.
-///
+///
/// If not using the recommended ffxFsr2GetJitterOffset function,
/// care should be taken that your jitter sequence never generates a null vector;
/// that is value of 0 in both the X and Y dimensions.
///
-/// @param [out] outX A pointer to a float which will contain the subpixel jitter offset for the x dimension.
-/// @param [out] outY A pointer to a float which will contain the subpixel jitter offset for the y dimension.
+/// @param [out] pOutX A pointer to a float which will contain the subpixel jitter offset for the x dimension.
+/// @param [out] pOutY A pointer to a float which will contain the subpixel jitter offset for the y dimension.
/// @param [in] index The index within the jitter sequence.
/// @param [in] phaseCount The length of jitter phase. See ffxFsr2GetJitterPhaseCount.
-///
+///
/// @retval
/// FFX_OK The operation completed successfully.
/// @retval
/// FFX_ERROR_INVALID_POINTER Either outX or outY was NULL.
/// @retval
/// FFX_ERROR_INVALID_ARGUMENT Argument phaseCount must be greater than 0.
-///
-/// @ingroup FSR2
-FFX_API FfxErrorCode ffxFsr2GetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount);
+///
+/// @ingroup ffxFsr2
+FFX_API FfxErrorCode ffxFsr2GetJitterOffset(float* pOutX, float* pOutY, int32_t index, int32_t phaseCount);
/// A helper function to check if a resource is
/// FFX_FSR2_RESOURCE_IDENTIFIER_NULL.
@@ -447,9 +518,26 @@ FFX_API FfxErrorCode ffxFsr2GetJitterOffset(float* outX, float* outY, int32_t in
/// @returns
/// false The resource was FFX_FSR2_RESOURCE_IDENTIFIER_NULL.
///
-/// @ingroup FSR2
+/// @ingroup ffxFsr2
FFX_API bool ffxFsr2ResourceIsNull(FfxResource resource);
+/// Queries the effect version number.
+///
+/// @returns
+/// The SDK version the effect was built with.
+///
+/// @ingroup ffxFsr2
+FFX_API FfxVersionNumber ffxFsr2GetEffectVersion();
+
+/// Set global debug message settings
+///
+/// @param [in] fpMessage A ffxMessageCallback
+/// @param [in] debugLevel An unsigned integer. Unimplemented.
+/// @retval
+/// FFX_OK The operation completed successfully.
+///
+/// @ingroup FRAMEINTERPOLATION
+FFX_API FfxErrorCode ffxFsr2SetGlobalDebugMessage(ffxMessageCallback fpMessage, uint32_t debugLevel);
#if defined(__cplusplus)
}
#endif // #if defined(__cplusplus)
diff --git a/thirdparty/amd-fsr2/ffx_fsr2_maximum_bias.h b/thirdparty/amd-ffx/ffx_fsr2_maximum_bias.h
similarity index 89%
rename from thirdparty/amd-fsr2/ffx_fsr2_maximum_bias.h
rename to thirdparty/amd-ffx/ffx_fsr2_maximum_bias.h
index 5fdbd0cdcd00..a6847e357f71 100644
--- a/thirdparty/amd-fsr2/ffx_fsr2_maximum_bias.h
+++ b/thirdparty/amd-ffx/ffx_fsr2_maximum_bias.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -23,8 +24,8 @@
#pragma once
-static const int FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH = 16;
-static const int FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT = 16;
+static const int32_t FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH = 16;
+static const int32_t FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT = 16;
static const float ffxFsr2MaximumBias[] = {
2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.876f, 1.809f, 1.772f, 1.753f, 1.748f,
2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.869f, 1.801f, 1.764f, 1.745f, 1.739f,
diff --git a/thirdparty/amd-fsr2/ffx_fsr2_private.h b/thirdparty/amd-ffx/ffx_fsr2_private.h
similarity index 65%
rename from thirdparty/amd-fsr2/ffx_fsr2_private.h
rename to thirdparty/amd-ffx/ffx_fsr2_private.h
index 8a9aec577886..d2840aae7bc7 100644
--- a/thirdparty/amd-fsr2/ffx_fsr2_private.h
+++ b/thirdparty/amd-ffx/ffx_fsr2_private.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -20,8 +21,29 @@
// THE SOFTWARE.
#pragma once
+#include "gpu/fsr2/ffx_fsr2_resources.h"
+
+/// An enumeration of all the permutations that can be passed to the FSR2 algorithm.
+///
+/// FSR2 features are organized through a set of pre-defined compile
+/// permutation options that need to be specified. Which shader blob
+/// is returned for pipeline creation will be determined by what combination
+/// of shader permutations are enabled.
+///
+/// @ingroup FSR2
+typedef enum Fs2ShaderPermutationOptions
+{
+ FSR2_SHADER_PERMUTATION_USE_LANCZOS_TYPE = (1 << 0), ///< Off means reference, On means LUT
+ FSR2_SHADER_PERMUTATION_HDR_COLOR_INPUT = (1 << 1), ///< Enables the HDR code path
+ FSR2_SHADER_PERMUTATION_LOW_RES_MOTION_VECTORS = (1 << 2), ///< Indicates low resolution motion vectors provided
+ FSR2_SHADER_PERMUTATION_JITTER_MOTION_VECTORS = (1 << 3), ///< Indicates motion vectors were generated with jitter
+ FSR2_SHADER_PERMUTATION_DEPTH_INVERTED = (1 << 4), ///< Indicates input resources were generated with inverted depth
+ FSR2_SHADER_PERMUTATION_ENABLE_SHARPENING = (1 << 5), ///< Enables a supplementary sharpening pass
+ FSR2_SHADER_PERMUTATION_FORCE_WAVE64 = (1 << 6), ///< doesn't map to a define, selects different table
+ FSR2_SHADER_PERMUTATION_ALLOW_FP16 = (1 << 7), ///< Enables fast math computations where possible
+} Fs2ShaderPermutationOptions;
-// Constants for FSR2 DX12 dispatches. Must be kept in sync with cbFSR2 in ffx_fsr2_callbacks_hlsl.h
+// Constants for FSR2 dispatches. Must be kept in sync with cbFSR2 in ffx_fsr2_callbacks_hlsl.h
typedef struct Fsr2Constants {
int32_t renderSize[2];
@@ -31,7 +53,7 @@ typedef struct Fsr2Constants {
int32_t lumaMipDimensions[2];
int32_t lumaMipLevelToUse;
int32_t frameIndex;
-
+
float deviceToViewDepth[4];
float jitterOffset[2];
float motionVectorScale[2];
@@ -45,20 +67,22 @@ typedef struct Fsr2Constants {
float dynamicResChangeFactor;
float viewSpaceToMetersFactor;
+ // GODOT BEGINS
float pad;
float reprojectionMatrix[16];
+ // GODOT ENDS
} Fsr2Constants;
struct FfxFsr2ContextDescription;
struct FfxDeviceCapabilities;
struct FfxPipelineState;
-struct FfxResource;
// FfxFsr2Context_Private
// The private implementation of the FSR2 context.
typedef struct FfxFsr2Context_Private {
FfxFsr2ContextDescription contextDescription;
+ FfxUInt32 effectContextId;
Fsr2Constants constants;
FfxDevice device;
FfxDeviceCapabilities deviceCapabilities;
@@ -71,13 +95,13 @@ typedef struct FfxFsr2Context_Private {
FfxPipelineState pipelineComputeLuminancePyramid;
FfxPipelineState pipelineGenerateReactive;
FfxPipelineState pipelineTcrAutogenerate;
+ FfxConstantBuffer constantBuffers[4];
// 2 arrays of resources, as e.g. FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS will use different resources when bound as SRV vs when bound as UAV
FfxResourceInternal srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_COUNT];
FfxResourceInternal uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_COUNT];
bool firstExecution;
- bool refreshPipelineStates;
uint32_t resourceFrameIndex;
float previousJitterOffset[2];
int32_t jitterPhaseCountRemaining;
diff --git a/thirdparty/amd-ffx/ffx_fsr3.cpp b/thirdparty/amd-ffx/ffx_fsr3.cpp
new file mode 100644
index 000000000000..01aaac7ca6fc
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_fsr3.cpp
@@ -0,0 +1,589 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include // for max used inside SPD CPU code.
+#include // for fabs, abs, sinf, sqrt, etc.
+#include // for memset
+#include // for FLT_EPSILON
+#include "ffx_fsr3.h"
+#include "ffx_fsr3upscaler.h"
+#define FFX_CPU
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wunused-function"
+#pragma clang diagnostic ignored "-Wsign-compare"
+#endif
+
+#include "gpu/ffx_core.h"
+#include "gpu/fsr3/ffx_fsr3_resources.h"
+#include "ffx_object_management.h"
+#include "ffx_frameinterpolation_private.h"
+
+#include "ffx_fsr3_private.h"
+
+// To track only one context is present, also used in fi dispatch callback
+static FfxFsr3Context* s_Context = nullptr;
+
+FfxErrorCode ffxFsr3ContextCreate(FfxFsr3Context* context, FfxFsr3ContextDescription* contextDescription)
+{
+ FFX_STATIC_ASSERT(sizeof(FfxFsr3Context) >= sizeof(FfxFsr3Context_Private));
+ FfxErrorCode ret = FFX_OK;
+ FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context);
+
+ // Prepare backend
+ memset(context, 0, sizeof(FfxFsr3Context_Private));
+
+ // check pointers are valid.
+ FFX_RETURN_ON_ERROR(
+ context,
+ FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(
+ contextDescription,
+ FFX_ERROR_INVALID_POINTER);
+
+ contextPrivate->description = *contextDescription;
+
+ contextPrivate->backendInterfaceSharedResources = contextDescription->backendInterfaceSharedResources;
+ contextPrivate->backendInterfaceUpscaling = contextDescription->backendInterfaceUpscaling;
+ contextPrivate->backendInterfaceFrameInterpolation = contextDescription->backendInterfaceFrameInterpolation;
+
+ bool upscalingOnly = (contextDescription->flags & FFX_FSR3_ENABLE_UPSCALING_ONLY) != 0;
+ bool interpolationOnly = (contextDescription->flags & FFX_FSR3_ENABLE_INTERPOLATION_ONLY) != 0;
+ contextPrivate->asyncWorkloadSupported = (contextDescription->flags & FFX_FSR3_ENABLE_ASYNC_WORKLOAD_SUPPORT) != 0;
+ contextPrivate->sharedResourceCount = contextPrivate->asyncWorkloadSupported ? FSR3_MAX_QUEUED_FRAMES : 1;
+
+ // ensure upscalingOnly and interpolationOnly are not set simultaneously
+ FFX_ASSERT(upscalingOnly == false || interpolationOnly == false);
+
+ // validate that all callbacks are set for the backend interfaces
+ if (contextPrivate->interpolationOnly)
+ {
+ const FfxUInt32 numBackendsToVerify = 2;
+ FfxInterface* backendsToVerify[] = { &contextPrivate->backendInterfaceSharedResources,
+ &contextPrivate->backendInterfaceFrameInterpolation };
+
+ for (FfxUInt32 i = 0; i < numBackendsToVerify; i++)
+ {
+ FfxInterface* backend = backendsToVerify[i];
+ FFX_RETURN_ON_ERROR(backend, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(backend->fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(backend->fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(backend->fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+
+ // if a scratch buffer is declared, then we must have a size
+ if (backend->scratchBuffer)
+ {
+ FFX_RETURN_ON_ERROR(backend->scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE);
+ }
+ }
+ }
+ else
+ {
+ const FfxUInt32 numBackendsToVerify = contextPrivate->upscalingOnly ? 1 : 3;
+ FfxInterface* backendsToVerify[] = { &contextPrivate->backendInterfaceUpscaling,
+ &contextPrivate->backendInterfaceSharedResources,
+ &contextPrivate->backendInterfaceFrameInterpolation };
+
+ for (FfxUInt32 i = 0; i < numBackendsToVerify; i++)
+ {
+ FfxInterface* backend = backendsToVerify[i];
+ FFX_RETURN_ON_ERROR(backend, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(backend->fpGetSDKVersion, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(backend->fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(backend->fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(backend->fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+
+ // if a scratch buffer is declared, then we must have a size
+ if (backend->scratchBuffer)
+ {
+ FFX_RETURN_ON_ERROR(backend->scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE);
+ }
+ }
+ }
+
+ if (!contextPrivate->upscalingOnly)
+ {
+ FFX_VALIDATE(contextPrivate->backendInterfaceSharedResources.fpCreateBackendContext(&contextPrivate->backendInterfaceSharedResources,
+ FFX_EFFECT_SHAREDRESOURCES,
+ nullptr,
+ &contextPrivate->effectContextIdSharedResources));
+ }
+ else
+ {
+ contextPrivate->backendInterfaceSharedResources = contextPrivate->backendInterfaceUpscaling;
+ contextDescription->backendInterfaceSharedResources = contextDescription->backendInterfaceUpscaling;
+ }
+
+ // set up FSR3 Upscaler
+ // ensure we're actually creating an FSR3 Upscaler context, not the creationfunction that reroutes to ffxFsr3ContextCreate
+ if (!interpolationOnly)
+ {
+ FfxFsr3UpscalerContextDescription upDesc = {};
+ upDesc.flags = 0;
+ upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_HIGH_DYNAMIC_RANGE) ? FFX_FSR3UPSCALER_ENABLE_HIGH_DYNAMIC_RANGE : 0;
+ upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) ? FFX_FSR3UPSCALER_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS : 0;
+ upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) ? FFX_FSR3UPSCALER_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION : 0;
+ upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DEPTH_INVERTED) ? FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED : 0;
+ upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DEPTH_INFINITE) ? FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE : 0;
+ upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_AUTO_EXPOSURE) ? FFX_FSR3UPSCALER_ENABLE_AUTO_EXPOSURE : 0;
+ upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DYNAMIC_RESOLUTION) ? FFX_FSR3UPSCALER_ENABLE_DYNAMIC_RESOLUTION : 0;
+ upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DEBUG_CHECKING) ? FFX_FSR3UPSCALER_ENABLE_DEBUG_CHECKING : 0;
+ upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_HDR_UPSCALE_SDR_FINALOUTPUT) ? FFX_FSR3UPSCALER_ENABLE_HIGH_DYNAMIC_RANGE : 0;
+ upDesc.maxRenderSize = contextDescription->maxRenderSize;
+ upDesc.maxUpscaleSize = contextDescription->maxUpscaleSize;
+ upDesc.backendInterface = contextDescription->backendInterfaceUpscaling;
+ upDesc.fpMessage = contextDescription->fpMessage;
+ FFX_VALIDATE(ffxFsr3UpscalerContextCreate(&contextPrivate->upscalerContext, &upDesc));
+ }
+
+ if (!upscalingOnly)
+ {
+
+ FfxOpticalflowContextDescription ofDescription = {};
+ ofDescription.backendInterface = contextDescription->backendInterfaceFrameInterpolation;
+ ofDescription.resolution = contextDescription->displaySize;
+
+ // set up Opticalflow
+ FFX_VALIDATE(ffxOpticalflowContextCreate(&contextPrivate->ofContext, &ofDescription));
+
+ FfxFrameInterpolationContextDescription fiDescription = {};
+ fiDescription.backendInterface = contextDescription->backendInterfaceFrameInterpolation;
+ fiDescription.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) ? FFX_FRAMEINTERPOLATION_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS : 0;
+ fiDescription.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) ? FFX_FRAMEINTERPOLATION_ENABLE_JITTER_MOTION_VECTORS : 0;
+ fiDescription.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DEPTH_INVERTED) ? FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED : 0;
+ fiDescription.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DEPTH_INFINITE) ? FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INFINITE : 0;
+ fiDescription.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_HIGH_DYNAMIC_RANGE) ? FFX_FRAMEINTERPOLATION_ENABLE_HDR_COLOR_INPUT : 0;
+ fiDescription.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_SDR_UPSCALE_HDR_FINALOUTPUT) ? FFX_FRAMEINTERPOLATION_ENABLE_HDR_COLOR_INPUT : 0;
+ fiDescription.flags |= contextPrivate->asyncWorkloadSupported ? FFX_FRAMEINTERPOLATION_ENABLE_ASYNC_SUPPORT : 0;
+ fiDescription.maxRenderSize = contextDescription->maxRenderSize;
+ fiDescription.displaySize = contextDescription->displaySize;
+ fiDescription.backBufferFormat = contextDescription->backBufferFormat;
+ // This is a new item exposed only through ffx API on PC
+ fiDescription.previousInterpolationSourceFormat = contextDescription->backBufferFormat;
+
+ // set up Frameinterpolation
+ FFX_VALIDATE(ffxFrameInterpolationContextCreate(&contextPrivate->fiContext, &fiDescription));
+ contextPrivate->effectContextIdFrameGeneration = reinterpret_cast(&contextPrivate->fiContext)->effectContextId;
+
+ // set up optical flow resources
+ FfxOpticalflowSharedResourceDescriptions ofResourceDescs = {};
+ FFX_VALIDATE(ffxOpticalflowGetSharedResourceDescriptions(&contextPrivate->ofContext, &ofResourceDescs));
+
+ FFX_VALIDATE(contextDescription->backendInterfaceSharedResources.fpCreateResource(
+ &contextDescription->backendInterfaceSharedResources, &ofResourceDescs.opticalFlowVector, contextPrivate->effectContextIdSharedResources, &contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR]));
+ FFX_VALIDATE(contextDescription->backendInterfaceSharedResources.fpCreateResource(
+ &contextDescription->backendInterfaceSharedResources, &ofResourceDescs.opticalFlowSCD, contextPrivate->effectContextIdSharedResources, &contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT]));
+ }
+
+ // set up FSR3Upscaler resources
+ if (!contextPrivate->interpolationOnly)
+ {
+ FfxFsr3UpscalerSharedResourceDescriptions fs3UpscalerResourceDescs = {};
+ FFX_VALIDATE(ffxFsr3UpscalerGetSharedResourceDescriptions(&contextPrivate->upscalerContext, &fs3UpscalerResourceDescs));
+
+ wchar_t Name[256] = {};
+ for (FfxUInt32 i = 0; i < contextPrivate->sharedResourceCount; i++)
+ {
+ FfxCreateResourceDescription dilD = fs3UpscalerResourceDescs.dilatedDepth;
+ swprintf(Name, 255, L"%s%d", fs3UpscalerResourceDescs.dilatedDepth.name, i);
+ dilD.name = Name;
+ FFX_VALIDATE(contextDescription->backendInterfaceSharedResources.fpCreateResource(
+ &contextDescription->backendInterfaceSharedResources, &dilD, contextPrivate->effectContextIdSharedResources, &contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_0 + (i * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]));
+
+ FfxCreateResourceDescription dilMVs = fs3UpscalerResourceDescs.dilatedMotionVectors;
+ swprintf(Name, 255, L"%s%d", fs3UpscalerResourceDescs.dilatedMotionVectors.name, i);
+ dilMVs.name = Name;
+ FFX_VALIDATE(contextDescription->backendInterfaceSharedResources.fpCreateResource(
+ &contextDescription->backendInterfaceSharedResources, &dilMVs, contextPrivate->effectContextIdSharedResources, &contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_0 + (i * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]));
+
+ FfxCreateResourceDescription recND = fs3UpscalerResourceDescs.reconstructedPrevNearestDepth;
+ swprintf(Name, 255, L"%s%d", fs3UpscalerResourceDescs.reconstructedPrevNearestDepth.name, i);
+ recND.name = Name;
+ FFX_VALIDATE(contextDescription->backendInterfaceSharedResources.fpCreateResource(
+ &contextDescription->backendInterfaceSharedResources, &recND, contextPrivate->effectContextIdSharedResources, &contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_0 + (i * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]));
+ }
+ }
+
+ return ret;
+}
+
+FfxErrorCode ffxFsr3ContextGetGpuMemoryUsage(
+ FfxFsr3Context* context,
+ FfxEffectMemoryUsage* pUpscalerUsage,
+ FfxEffectMemoryUsage* pOpticalFlowUsage,
+ FfxEffectMemoryUsage* pFrameGenerationUsage)
+{
+ FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER);
+
+ FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context);
+
+ if (nullptr != pUpscalerUsage)
+ {
+ memset(pUpscalerUsage, 0, sizeof(FfxEffectMemoryUsage));
+ ffxFsr3UpscalerContextGetGpuMemoryUsage(&contextPrivate->upscalerContext, pUpscalerUsage);
+ }
+
+
+ if (nullptr != pOpticalFlowUsage)
+ {
+ memset(pOpticalFlowUsage, 0, sizeof(FfxEffectMemoryUsage));
+ ffxOpticalflowContextGetGpuMemoryUsage(&contextPrivate->ofContext, pOpticalFlowUsage);
+ }
+
+ if (nullptr != pFrameGenerationUsage)
+ {
+ memset(pFrameGenerationUsage, 0, sizeof(FfxEffectMemoryUsage));
+ ffxFrameInterpolationContextGetGpuMemoryUsage(&contextPrivate->fiContext, pFrameGenerationUsage);
+ }
+
+ return FFX_OK;
+}
+
+FfxErrorCode ffxFsr3ContextGenerateReactiveMask(FfxFsr3Context* context, const FfxFsr3GenerateReactiveDescription* params)
+{
+ FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context);
+
+ FfxFsr3UpscalerGenerateReactiveDescription fsr3Params{};
+
+ fsr3Params.commandList = params->commandList;
+ fsr3Params.colorOpaqueOnly = params->colorOpaqueOnly;
+ fsr3Params.colorPreUpscale = params->colorPreUpscale;
+ fsr3Params.outReactive = params->outReactive;
+ fsr3Params.renderSize = params->renderSize;
+ fsr3Params.scale = params->scale;
+ fsr3Params.cutoffThreshold = params->cutoffThreshold;
+ fsr3Params.binaryValue = params->binaryValue;
+ fsr3Params.flags = params->flags;
+
+ return ffxFsr3UpscalerContextGenerateReactiveMask(&contextPrivate->upscalerContext, &fsr3Params);
+}
+
+FfxErrorCode ffxFsr3DispatchFrameGeneration(const FfxFrameGenerationDispatchDescription* callbackDesc)
+{
+ FfxErrorCode errorCode = FFX_OK;
+
+ FFX_RETURN_ON_ERROR(s_Context, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(callbackDesc, FFX_ERROR_INVALID_POINTER);
+
+ FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(s_Context);
+
+ bool upscalingOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_UPSCALING_ONLY) != 0;
+ FFX_ASSERT_MESSAGE(upscalingOnly == false, "Fsr3 context has not been initialized to support Frame Generation");
+
+ const FfxFrameInterpolationPrepareDescription* prepareDesc = &contextPrivate->fgPrepareDescriptions[callbackDesc->frameID & 1];
+
+ // Optical flow
+ {
+ FfxOpticalflowDispatchDescription ofDispatchDesc{};
+ ofDispatchDesc.commandList = callbackDesc->commandList;
+ ofDispatchDesc.color = callbackDesc->presentColor;
+ if (contextPrivate->HUDLess_color.resource)
+ {
+ ofDispatchDesc.color = contextPrivate->HUDLess_color;
+ }
+ ofDispatchDesc.reset = callbackDesc->reset;
+ ofDispatchDesc.backbufferTransferFunction = callbackDesc->backBufferTransferFunction;
+ ofDispatchDesc.minMaxLuminance.x = callbackDesc->minMaxLuminance[0];
+ ofDispatchDesc.minMaxLuminance.y = callbackDesc->minMaxLuminance[1];
+ ofDispatchDesc.opticalFlowVector = contextPrivate->backendInterfaceSharedResources.fpGetResource(&contextPrivate->backendInterfaceSharedResources, contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR]);
+ ofDispatchDesc.opticalFlowSCD = contextPrivate->backendInterfaceSharedResources.fpGetResource(&contextPrivate->backendInterfaceSharedResources, contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT]);
+
+ errorCode |= ffxOpticalflowContextDispatch(&contextPrivate->ofContext, &ofDispatchDesc);
+ }
+
+ // Frame interpolation
+ {
+ FfxFrameInterpolationDispatchDescription fiDispatchDesc{0};
+
+ // don't dispatch interpolation async for now: use the same commandlist for copy and interpolate
+ fiDispatchDesc.commandList = callbackDesc->commandList;
+ fiDispatchDesc.displaySize.width = callbackDesc->presentColor.description.width;
+ fiDispatchDesc.displaySize.height = callbackDesc->presentColor.description.height;
+ fiDispatchDesc.currentBackBuffer = callbackDesc->presentColor;
+ fiDispatchDesc.currentBackBuffer_HUDLess = contextPrivate->HUDLess_color;
+
+ fiDispatchDesc.renderSize = prepareDesc->renderSize;
+ fiDispatchDesc.output = callbackDesc->outputs[0];
+ fiDispatchDesc.opticalFlowVector = contextPrivate->backendInterfaceSharedResources.fpGetResource(&contextPrivate->backendInterfaceSharedResources, contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR]);
+ fiDispatchDesc.opticalFlowSceneChangeDetection = contextPrivate->backendInterfaceSharedResources.fpGetResource(&contextPrivate->backendInterfaceSharedResources, contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT]);
+ fiDispatchDesc.opticalFlowBlockSize = 8;
+ fiDispatchDesc.opticalFlowScale = { 1.f / fiDispatchDesc.displaySize.width, 1.f / fiDispatchDesc.displaySize.height };
+ fiDispatchDesc.frameTimeDelta = prepareDesc->frameTimeDelta;
+ fiDispatchDesc.reset = callbackDesc->reset;
+ fiDispatchDesc.cameraNear = prepareDesc->cameraNear;
+ fiDispatchDesc.cameraFar = prepareDesc->cameraFar;
+ fiDispatchDesc.viewSpaceToMetersFactor = prepareDesc->viewSpaceToMetersFactor;
+ fiDispatchDesc.cameraFovAngleVertical = prepareDesc->cameraFovAngleVertical;
+ fiDispatchDesc.interpolationRect.left = callbackDesc->interpolationRect.left;
+ fiDispatchDesc.interpolationRect.top = callbackDesc->interpolationRect.top;
+ fiDispatchDesc.interpolationRect.width = callbackDesc->interpolationRect.width;
+ fiDispatchDesc.interpolationRect.height = callbackDesc->interpolationRect.height;
+ fiDispatchDesc.frameID = callbackDesc->frameID;
+
+ // use the same surfaces that were specified in the upscale (or interpolation prepare)
+ fiDispatchDesc.dilatedDepth = contextPrivate->dilatedDepth;
+ fiDispatchDesc.dilatedMotionVectors = contextPrivate->dilatedMotionVectors;
+ fiDispatchDesc.reconstructedPrevDepth = contextPrivate->reconstructedPrevNearestDepth;
+
+ if (contextPrivate->frameGenerationFlags & FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_TEAR_LINES)
+ {
+ fiDispatchDesc.flags |= FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_TEAR_LINES;
+ }
+
+ if (contextPrivate->frameGenerationFlags & FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_VIEW)
+ {
+ fiDispatchDesc.flags |= FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_VIEW;
+ }
+
+ fiDispatchDesc.backBufferTransferFunction = callbackDesc->backBufferTransferFunction;
+ fiDispatchDesc.minMaxLuminance[0] = callbackDesc->minMaxLuminance[0];
+ fiDispatchDesc.minMaxLuminance[1] = callbackDesc->minMaxLuminance[1];
+
+ errorCode |= ffxFrameInterpolationDispatch(&contextPrivate->fiContext, &fiDispatchDesc);
+ }
+
+ return errorCode;
+}
+
+FfxErrorCode ffxFsr3ContextDispatchUpscale(FfxFsr3Context* context, const FfxFsr3DispatchUpscaleDescription* dispatchParams)
+{
+ FfxErrorCode ret = FFX_OK;
+
+ FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(dispatchParams, FFX_ERROR_INVALID_POINTER);
+
+ FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context);
+
+ bool interpolationOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_INTERPOLATION_ONLY) != 0;
+ FFX_ASSERT_MESSAGE(interpolationOnly == false, "Fsr3 context has not been initialized to support Frame Generation");
+
+ contextPrivate->deltaTime = FFX_MAXIMUM(0.0f, FFX_MINIMUM(1.0f, dispatchParams->frameTimeDelta / 1000.0f));
+
+ FfxUInt32 sharedResourceIndexUpscaling = dispatchParams->frameID % contextPrivate->sharedResourceCount;
+
+ contextPrivate->dilatedDepth = contextPrivate->backendInterfaceSharedResources.fpGetResource(
+ &contextPrivate->backendInterfaceSharedResources,
+ contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_0 +
+ (sharedResourceIndexUpscaling * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]);
+ contextPrivate->dilatedMotionVectors = contextPrivate->backendInterfaceSharedResources.fpGetResource(
+ &contextPrivate->backendInterfaceSharedResources,
+ contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_0 +
+ (sharedResourceIndexUpscaling * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]);
+ contextPrivate->reconstructedPrevNearestDepth = contextPrivate->backendInterfaceSharedResources.fpGetResource(
+ &contextPrivate->backendInterfaceSharedResources,
+ contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_0 +
+ (sharedResourceIndexUpscaling * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]);
+
+ // dispatch FSR3
+ FfxFsr3UpscalerDispatchDescription fsr3DispatchParams{};
+ fsr3DispatchParams.commandList = dispatchParams->commandList;
+ fsr3DispatchParams.color = dispatchParams->color;
+ fsr3DispatchParams.depth = dispatchParams->depth;
+ fsr3DispatchParams.motionVectors = dispatchParams->motionVectors;
+ fsr3DispatchParams.exposure = dispatchParams->exposure;
+ fsr3DispatchParams.reactive = dispatchParams->reactive;
+ fsr3DispatchParams.transparencyAndComposition = dispatchParams->transparencyAndComposition;
+ fsr3DispatchParams.output = dispatchParams->upscaleOutput;
+ fsr3DispatchParams.jitterOffset = dispatchParams->jitterOffset;
+ fsr3DispatchParams.motionVectorScale = dispatchParams->motionVectorScale;
+ fsr3DispatchParams.renderSize = dispatchParams->renderSize;
+ fsr3DispatchParams.enableSharpening = dispatchParams->enableSharpening;
+ fsr3DispatchParams.sharpness = dispatchParams->sharpness;
+ fsr3DispatchParams.frameTimeDelta = dispatchParams->frameTimeDelta;
+ fsr3DispatchParams.preExposure = dispatchParams->preExposure;
+ fsr3DispatchParams.reset = dispatchParams->reset;
+ fsr3DispatchParams.cameraNear = dispatchParams->cameraNear;
+ fsr3DispatchParams.cameraFar = dispatchParams->cameraFar;
+ fsr3DispatchParams.cameraFovAngleVertical = dispatchParams->cameraFovAngleVertical;
+ fsr3DispatchParams.viewSpaceToMetersFactor = dispatchParams->viewSpaceToMetersFactor;
+ fsr3DispatchParams.dilatedDepth = contextPrivate->dilatedDepth;
+ fsr3DispatchParams.dilatedMotionVectors = contextPrivate->dilatedMotionVectors;
+ fsr3DispatchParams.reconstructedPrevNearestDepth = contextPrivate->reconstructedPrevNearestDepth;
+
+ if (dispatchParams->flags & FFX_FSR3_UPSCALER_FLAG_DRAW_DEBUG_VIEW)
+ {
+ fsr3DispatchParams.flags |= FFX_FSR3UPSCALER_DISPATCH_DRAW_DEBUG_VIEW;
+ }
+
+ ret = ffxFsr3UpscalerContextDispatch(&contextPrivate->upscalerContext, &fsr3DispatchParams);
+
+ return ret;
+}
+
+FfxErrorCode ffxFsr3ContextDispatchFrameGenerationPrepare(FfxFsr3Context* context, const FfxFsr3DispatchFrameGenerationPrepareDescription* dispatchParams)
+{
+ FfxErrorCode ret = FFX_OK;
+
+ FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context);
+
+ bool upscalingOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_UPSCALING_ONLY) != 0;
+ FFX_ASSERT_MESSAGE(upscalingOnly == false, "Fsr3 context has not been initialized to support Frame Generation");
+
+ // if not interpolationOnly there's no need to execute prepare as prepared resources from upscale can be used
+ bool interpolationOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_INTERPOLATION_ONLY) != 0;
+
+ FfxUInt32 sharedResourceIndexUpscaling = dispatchParams->frameID % contextPrivate->sharedResourceCount;
+
+ FfxFrameInterpolationPrepareDescription fiPrepareParams = {0};
+ fiPrepareParams.commandList = dispatchParams->commandList;
+ fiPrepareParams.renderSize = dispatchParams->renderSize;
+ fiPrepareParams.depth = dispatchParams->depth;
+ fiPrepareParams.motionVectors = dispatchParams->motionVectors;
+ fiPrepareParams.jitterOffset = dispatchParams->jitterOffset;
+ fiPrepareParams.motionVectorScale = dispatchParams->motionVectorScale;
+ fiPrepareParams.frameTimeDelta = dispatchParams->frameTimeDelta;
+ fiPrepareParams.cameraNear = dispatchParams->cameraNear;
+ fiPrepareParams.cameraFar = dispatchParams->cameraFar;
+ fiPrepareParams.viewSpaceToMetersFactor = dispatchParams->viewSpaceToMetersFactor;
+ fiPrepareParams.cameraFovAngleVertical = dispatchParams->cameraFovAngleVertical;
+ fiPrepareParams.frameID = dispatchParams->frameID;
+
+ contextPrivate->dilatedDepth = contextPrivate->backendInterfaceSharedResources.fpGetResource(
+ &contextPrivate->backendInterfaceSharedResources,
+ contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_0 +
+ (sharedResourceIndexUpscaling * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]);
+ contextPrivate->dilatedMotionVectors = contextPrivate->backendInterfaceSharedResources.fpGetResource(
+ &contextPrivate->backendInterfaceSharedResources,
+ contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_0 +
+ (sharedResourceIndexUpscaling * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]);
+ contextPrivate->reconstructedPrevNearestDepth = contextPrivate->backendInterfaceSharedResources.fpGetResource(
+ &contextPrivate->backendInterfaceSharedResources,
+ contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_0 +
+ (sharedResourceIndexUpscaling * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]);
+
+ fiPrepareParams.dilatedDepth = contextPrivate->dilatedDepth;
+ fiPrepareParams.dilatedMotionVectors = contextPrivate->dilatedMotionVectors;
+ fiPrepareParams.reconstructedPrevDepth = contextPrivate->reconstructedPrevNearestDepth;
+
+ if (interpolationOnly)
+ {
+ ret = ffxFrameInterpolationPrepare(&contextPrivate->fiContext, &fiPrepareParams);
+ }
+
+ contextPrivate->fgPrepareDescriptions[dispatchParams->frameID & 1] = fiPrepareParams;
+
+ return ret;
+}
+
+FfxErrorCode ffxFsr3ConfigureFrameGeneration(FfxFsr3Context* context, const FfxFrameGenerationConfig* config)
+{
+ FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context);
+
+ bool upscalingOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_UPSCALING_ONLY) != 0;
+ FFX_ASSERT_MESSAGE(upscalingOnly == false, "Fsr3 context has not been initialized to support Frame Generation");
+
+ FFX_ASSERT(config);
+ FFX_ASSERT_MESSAGE(!contextPrivate->frameGenerationEnabled || !config->allowAsyncWorkloads || contextPrivate->asyncWorkloadSupported,
+ "Illegal to allow async workload when context was created without FFX_FSR3_ENABLE_ASYNC_WORKLOAD_SUPPORT flag set.");
+
+ FfxFrameGenerationConfig patchedConfig = *config;
+
+ contextPrivate->frameGenerationFlags = patchedConfig.flags;
+ contextPrivate->HUDLess_color = patchedConfig.HUDLessColor;
+
+ if (patchedConfig.flags & FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_VIEW)
+ {
+ patchedConfig.onlyPresentInterpolated = true;
+ }
+
+ if (patchedConfig.flags & FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_PACING_LINES)
+ {
+ patchedConfig.drawDebugPacingLines = true;
+ }
+
+ // reset shared resource indices
+ if (contextPrivate->frameGenerationEnabled != patchedConfig.frameGenerationEnabled)
+ {
+ contextPrivate->frameGenerationEnabled = patchedConfig.frameGenerationEnabled;
+
+ if (contextPrivate->frameGenerationEnabled) {
+ FFX_ASSERT(nullptr == s_Context);
+ s_Context = context;
+ }
+ else if (s_Context == context) {
+ s_Context = nullptr;
+ }
+ }
+
+ return contextPrivate->backendInterfaceFrameInterpolation.fpSwapChainConfigureFrameGeneration(&patchedConfig);
+}
+
+FfxErrorCode ffxFsr3ContextDestroy(FfxFsr3Context* context)
+{
+ FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context);
+
+ for (FfxUInt32 i = 0; i < FFX_FSR3_RESOURCE_IDENTIFIER_COUNT; i++)
+ {
+ FFX_VALIDATE(contextPrivate->backendInterfaceSharedResources.fpDestroyResource(&contextPrivate->backendInterfaceSharedResources, contextPrivate->sharedResources[i], contextPrivate->effectContextIdSharedResources))
+ }
+ contextPrivate->backendInterfaceSharedResources.fpDestroyBackendContext(&contextPrivate->backendInterfaceSharedResources, contextPrivate->effectContextIdSharedResources);
+
+ bool upscalingOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_UPSCALING_ONLY) != 0;
+ bool interpolationOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_INTERPOLATION_ONLY) != 0;
+
+ if (!upscalingOnly)
+ {
+ FFX_VALIDATE(ffxFrameInterpolationContextDestroy(&contextPrivate->fiContext));
+ FFX_VALIDATE(ffxOpticalflowContextDestroy(&contextPrivate->ofContext));
+ }
+
+ if (!interpolationOnly)
+ {
+ FFX_VALIDATE(ffxFsr3UpscalerContextDestroy(&contextPrivate->upscalerContext));
+ }
+
+ if (s_Context == context) {
+ s_Context = nullptr;
+ }
+
+ return FFX_OK;
+}
+
+float ffxFsr3GetUpscaleRatioFromQualityMode(FfxFsr3QualityMode qualityMode)
+{
+ return ffxFsr3UpscalerGetUpscaleRatioFromQualityMode((FfxFsr3UpscalerQualityMode)qualityMode);
+}
+
+FfxErrorCode ffxFsr3GetRenderResolutionFromQualityMode(
+ uint32_t* renderWidth, uint32_t* renderHeight, uint32_t displayWidth, uint32_t displayHeight, FfxFsr3QualityMode qualityMode)
+{
+ return ffxFsr3UpscalerGetRenderResolutionFromQualityMode( renderWidth, renderHeight, displayWidth, displayHeight, (FfxFsr3UpscalerQualityMode) qualityMode);
+}
+
+int32_t ffxFsr3GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth)
+{
+ return ffxFsr3UpscalerGetJitterPhaseCount(renderWidth, displayWidth);
+}
+
+FfxErrorCode ffxFsr3GetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount)
+{
+ return ffxFsr3UpscalerGetJitterOffset(outX, outY, index, phaseCount);
+}
+
+FFX_API bool ffxFsr3ResourceIsNull(FfxResource resource)
+{
+ return ffxFsr3UpscalerResourceIsNull(resource);
+}
+
+FFX_API FfxVersionNumber ffxFsr3GetEffectVersion()
+{
+ return FFX_SDK_MAKE_VERSION(FFX_FSR3_VERSION_MAJOR, FFX_FSR3_VERSION_MINOR, FFX_FSR3_VERSION_PATCH);
+}
diff --git a/thirdparty/amd-ffx/ffx_fsr3.h b/thirdparty/amd-ffx/ffx_fsr3.h
new file mode 100644
index 000000000000..6b9d5472617e
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_fsr3.h
@@ -0,0 +1,540 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// @defgroup FSR3
+
+#pragma once
+
+// Include the interface for the backend of the FSR3 API.
+#include "ffx_interface.h"
+#include "ffx_fsr3upscaler.h"
+#include "ffx_frameinterpolation.h"
+#include "ffx_opticalflow.h"
+
+/// FidelityFX Super Resolution 3 major version.
+///
+/// @ingroup FSR3
+#define FFX_FSR3_VERSION_MAJOR (3)
+
+/// FidelityFX Super Resolution 3 minor version.
+///
+/// @ingroup FSR3
+#define FFX_FSR3_VERSION_MINOR (1)
+
+/// FidelityFX Super Resolution 3 patch version.
+///
+/// @ingroup FSR3
+#define FFX_FSR3_VERSION_PATCH (4)
+
+/// FidelityFX Super Resolution 3 context count
+///
+/// Defines the number of internal effect contexts required by FSR3 (+1 for proxy swapchain)
+///
+/// @ingroup ffxFsr3
+#define FFX_FSR3_CONTEXT_COUNT (FFX_FSR3UPSCALER_CONTEXT_COUNT + FFX_OPTICALFLOW_CONTEXT_COUNT + FFX_FRAMEINTERPOLATION_CONTEXT_COUNT + 1)
+
+/// The size of the context specified in 32bit values.
+///
+/// @ingroup FSR3
+#define FFX_FSR3_CONTEXT_SIZE (FFX_FSR3UPSCALER_CONTEXT_SIZE + FFX_OPTICALFLOW_CONTEXT_SIZE + FFX_FRAMEINTERPOLATION_CONTEXT_SIZE + FFX_SDK_DEFAULT_CONTEXT_SIZE)
+
+#if defined(__cplusplus)
+extern "C" {
+#endif // #if defined(__cplusplus)
+
+///// An enumeration of all the passes which constitute the FSR3 algorithm.
+/////
+///// FSR3 is implemented as a composite of several compute passes each
+///// computing a key part of the final result. Each call to the
+///// FfxFsr3ScheduleGpuJobFunc callback function will
+///// correspond to a single pass included in FfxFsr3Pass. For a
+///// more comprehensive description of each pass, please refer to the FSR3
+///// reference documentation.
+/////
+///// Please note in some cases e.g.: FFX_FSR3_PASS_ACCUMULATE
+///// and FFX_FSR3_PASS_ACCUMULATE_SHARPEN either one pass or the
+///// other will be used (they are mutually exclusive). The choice of which will
+///// depend on the way the FfxFsr3Context is created and the
+///// precise contents of FfxFsr3DispatchParamters each time a call
+///// is made to ffxFsr3ContextDispatch.
+/////
+///// @ingroup FSR3
+//typedef enum FfxFsr3Pass
+//{
+// // no special FSR3 pipelines
+//
+// FFX_FSR3_PASS_COUNT ///< The number of passes performed by FSR3.
+//} FfxFsr3Pass;
+
+/// An enumeration of all the quality modes supported by FidelityFX Super
+/// Resolution 2 upscaling.
+///
+/// In order to provide a consistent user experience across multiple
+/// applications which implement FSR3. It is strongly recommended that the
+/// following preset scaling factors are made available through your
+/// application's user interface.
+///
+/// If your application does not expose the notion of preset scaling factors
+/// for upscaling algorithms (perhaps instead implementing a fixed ratio which
+/// is immutable) or implementing a more dynamic scaling scheme (such as
+/// dynamic resolution scaling), then there is no need to use these presets.
+///
+/// Please note that FFX_FSR3_QUALITY_MODE_ULTRA_PERFORMANCE is
+/// an optional mode which may introduce significant quality degradation in the
+/// final image. As such it is recommended that you evaluate the final results
+/// of using this scaling mode before deciding if you should include it in your
+/// application.
+///
+/// @ingroup FSR3
+typedef enum FfxFsr3QualityMode {
+
+ FFX_FSR3_QUALITY_MODE_QUALITY = 1, ///< Perform upscaling with a per-dimension upscaling ratio of 1.5x.
+ FFX_FSR3_QUALITY_MODE_BALANCED = 2, ///< Perform upscaling with a per-dimension upscaling ratio of 1.7x.
+ FFX_FSR3_QUALITY_MODE_PERFORMANCE = 3, ///< Perform upscaling with a per-dimension upscaling ratio of 2.0x.
+ FFX_FSR3_QUALITY_MODE_ULTRA_PERFORMANCE = 4 ///< Perform upscaling with a per-dimension upscaling ratio of 3.0x.
+} FfxFsr3QualityMode;
+
+/// An enumeration of bit flags used when creating a
+/// FfxFsr3Context. See FfxFsr3ContextDescription.
+///
+/// @ingroup FSR3
+typedef enum FfxFsr3InitializationFlagBits {
+
+ FFX_FSR3_ENABLE_HIGH_DYNAMIC_RANGE = (1<<0), ///< A bit indicating if the input color data provided to all inputs is using a high-dynamic range.
+ FFX_FSR3_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS = (1<<1), ///< A bit indicating if the motion vectors are rendered at display resolution.
+ FFX_FSR3_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION = (1<<2), ///< A bit indicating that the motion vectors have the jittering pattern applied to them.
+ FFX_FSR3_ENABLE_DEPTH_INVERTED = (1<<3), ///< A bit indicating that the input depth buffer data provided is inverted [1..0].
+ FFX_FSR3_ENABLE_DEPTH_INFINITE = (1<<4), ///< A bit indicating that the input depth buffer data provided is using an infinite far plane.
+ FFX_FSR3_ENABLE_AUTO_EXPOSURE = (1<<5), ///< A bit indicating if automatic exposure should be applied to input color data.
+ FFX_FSR3_ENABLE_DYNAMIC_RESOLUTION = (1<<6), ///< A bit indicating that the application uses dynamic resolution scaling.
+ FFX_FSR3_ENABLE_TEXTURE1D_USAGE = (1<<7), ///< This value is deprecated, but remains in order to aid upgrading from older versions of FSR3.
+ FFX_FSR3_ENABLE_DEBUG_CHECKING = (1<<8), ///< A bit indicating that the runtime should check some API values and report issues.
+ FFX_FSR3_ENABLE_UPSCALING_ONLY = (1<<9), ///, A bit indicating that the context will only be used for upscaling
+ FFX_FSR3_ENABLE_HDR_UPSCALE_SDR_FINALOUTPUT = (1<<10), ///, A bit indicating if the input color data provided to UPSCALE is using a high-dynamic range, final output SDR.
+ FFX_FSR3_ENABLE_SDR_UPSCALE_HDR_FINALOUTPUT = (1<<11), ///, A bit indicating if the input color data provided to UPSCALE is using SDR, final output is high-dynamic range.
+ FFX_FSR3_ENABLE_ASYNC_WORKLOAD_SUPPORT = (1<<12),
+ FFX_FSR3_ENABLE_INTERPOLATION_ONLY = (1<<13),
+} FfxFsr3InitializationFlagBits;
+
+typedef enum FfxFsr3FrameGenerationFlags
+{
+ FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_TEAR_LINES = FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_TEAR_LINES, ///< A bit indicating that the debug tear lines will be drawn to the interpolated output.
+ FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_VIEW = FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_VIEW, ///< A bit indicating that the interpolated output resource will contain debug views with relevant information.
+ FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_PACING_LINES = FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_PACING_LINES ///< A bit indicating that the debug pacing lines will be drawn to the generated output.
+} FfxFsr3FrameGenerationFlags;
+
+typedef enum FfxFsr3UpscalingFlags
+{
+ FFX_FSR3_UPSCALER_FLAG_DRAW_DEBUG_VIEW = FFX_FSR3UPSCALER_DISPATCH_DRAW_DEBUG_VIEW, ///< A bit indicating that the upscaled output resource will contain debug views with relevant information.
+} FfxFsr3UpscalingFlags;
+
+/// A structure encapsulating the parameters required to initialize FidelityFX
+/// Super Resolution 3 upscaling.
+///
+/// @ingroup FSR3
+typedef struct FfxFsr3ContextDescription {
+ uint32_t flags; ///< A collection of FfxFsr3InitializationFlagBits.
+ FfxDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at.
+ FfxDimensions2D maxUpscaleSize; ///< The size of the presentation resolution targeted by the upscaling process.
+ FfxDimensions2D displaySize; ///< The size of the presentation resolution targeted by the frame interpolation process.
+ FfxInterface backendInterfaceSharedResources; ///< A set of pointers to the backend implementation for FidelityFX SDK
+ FfxInterface backendInterfaceUpscaling; ///< A set of pointers to the backend implementation for FidelityFX SDK
+ FfxInterface backendInterfaceFrameInterpolation; ///< A set of pointers to the backend implementation for FidelityFX SDK
+ FfxFsr3UpscalerMessage fpMessage; ///< A pointer to a function that can receive messages from the runtime.
+ FfxSurfaceFormat backBufferFormat; ///< The format of the swapchain surface
+
+} FfxFsr3ContextDescription;
+
+/// A structure encapsulating the parameters for dispatching the various passes
+/// of FidelityFX Super Resolution 3.
+///
+/// @ingroup FSR3
+typedef struct FfxFsr3DispatchUpscaleDescription {
+
+ FfxCommandList commandList; ///< The FfxCommandList to record FSR2 rendering commands into.
+ FfxResource color; ///< A FfxResource containing the color buffer for the current frame (at render resolution).
+ FfxResource depth; ///< A FfxResource containing 32bit depth values for the current frame (at render resolution).
+ FfxResource motionVectors; ///< A FfxResource containing 2-dimensional motion vectors (at render resolution if FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS is not set).
+ FfxResource exposure; ///< A optional FfxResource containing a 1x1 exposure value.
+ FfxResource reactive; ///< A optional FfxResource containing alpha value of reactive objects in the scene.
+ FfxResource transparencyAndComposition; ///< A optional FfxResource containing alpha value of special objects in the scene.
+ FfxResource upscaleOutput; ///< A FfxResource containing the output color buffer for the current frame (at presentation resolution).
+ FfxFloatCoords2D jitterOffset; ///< The subpixel jitter offset applied to the camera.
+ FfxFloatCoords2D motionVectorScale; ///< The scale factor to apply to motion vectors.
+ FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resources.
+ FfxDimensions2D upscaleSize; ///< The resolution that the upscaler will output.
+ bool enableSharpening; ///< Enable an additional sharpening pass.
+ float sharpness; ///< The sharpness value between 0 and 1, where 0 is no additional sharpness and 1 is maximum additional sharpness.
+ float frameTimeDelta; ///< The time elapsed since the last frame (expressed in milliseconds).
+ float preExposure; ///< The pre exposure value (must be > 0.0f)
+ bool reset; ///< A boolean value which when set to true, indicates the camera has moved discontinuously.
+ float cameraNear; ///< The distance to the near plane of the camera.
+ float cameraFar; ///< The distance to the far plane of the camera. This is used only used in case of non infinite depth.
+ float cameraFovAngleVertical; ///< The camera angle field of view in the vertical direction (expressed in radians).
+ float viewSpaceToMetersFactor; ///< The scale factor to convert view space units to meters
+ uint32_t flags; ///< combination of FfxFsr3UpscalingFlags
+ uint64_t frameID;
+} FfxFsr3DispatchUpscaleDescription;
+
+typedef struct FfxFsr3DispatchFrameGenerationPrepareDescription
+{
+ FfxCommandList commandList; ///< The FfxCommandList to record FSR2 rendering commands into.
+ FfxResource depth; ///< A FfxResource containing 32bit depth values for the current frame (at render resolution).
+ FfxResource motionVectors; ///< A FfxResource containing 2-dimensional motion vectors (at render resolution if FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS is not set).
+ FfxFloatCoords2D jitterOffset; ///< The subpixel jitter offset applied to the camera.
+ FfxFloatCoords2D motionVectorScale; ///< The scale factor to apply to motion vectors.
+ FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resources.
+
+ float frameTimeDelta;
+ float cameraNear;
+ float cameraFar;
+ float viewSpaceToMetersFactor;
+ float cameraFovAngleVertical;
+
+ uint64_t frameID;
+} FfxFsr3DispatchFrameGenerationPrepareDescription;
+
+FFX_API FfxErrorCode ffxFsr3DispatchFrameGeneration(const FfxFrameGenerationDispatchDescription* desc);
+
+/// A structure encapsulating the parameters for automatic generation of a reactive mask
+///
+/// @ingroup FSR3
+typedef struct FfxFsr3GenerateReactiveDescription {
+
+ FfxCommandList commandList; ///< The FfxCommandList to record FSR3 rendering commands into.
+ FfxResource colorOpaqueOnly; ///< A FfxResource containing the opaque only color buffer for the current frame (at render resolution).
+ FfxResource colorPreUpscale; ///< A FfxResource containing the opaque+translucent color buffer for the current frame (at render resolution).
+ FfxResource outReactive; ///< A FfxResource containing the surface to generate the reactive mask into.
+ FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resources.
+ float scale; ///< A value to scale the output
+ float cutoffThreshold; ///< A threshold value to generate a binary reactive mask
+ float binaryValue;
+ uint32_t flags; ///< Flags to determine how to generate the reactive mask
+} FfxFsr3GenerateReactiveDescription;
+
+/// A structure encapsulating the FidelityFX Super Resolution 3 context.
+///
+/// This sets up an object which contains all persistent internal data and
+/// resources that are required by FSR3.
+///
+/// The FfxFsr3Context object should have a lifetime matching
+/// your use of FSR3. Before destroying the FSR3 context care should be taken
+/// to ensure the GPU is not accessing the resources created or used by FSR3.
+/// It is therefore recommended that the GPU is idle before destroying the
+/// FSR3 context.
+///
+/// @ingroup FSR3
+typedef struct FfxFsr3Context
+{
+ uint32_t data[FFX_FSR3_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context.
+} FfxFsr3Context;
+
+/// Create a FidelityFX Super Resolution 3 context from the parameters
+/// programmed to the FfxFsr3CreateParams structure.
+///
+/// The context structure is the main object used to interact with the FSR3
+/// API, and is responsible for the management of the internal resources used
+/// by the FSR3 algorithm. When this API is called, multiple calls will be
+/// made via the pointers contained in the callbacks structure.
+/// These callbacks will attempt to retreive the device capabilities, and
+/// create the internal resources, and pipelines required by FSR3's
+/// frame-to-frame function. Depending on the precise configuration used when
+/// creating the FfxFsr3Context a different set of resources and
+/// pipelines might be requested via the callback functions.
+///
+/// The flags included in the flags field of
+/// FfxFsr3Context how match the configuration of your
+/// application as well as the intended use of FSR3. It is important that these
+/// flags are set correctly (as well as a correct programmed
+/// FfxFsr3DispatchDescription) to ensure correct operation. It is
+/// recommended to consult the overview documentation for further details on
+/// how FSR3 should be integerated into an application.
+///
+/// When the FfxFsr3Context is created, you should use the
+/// ffxFsr3ContextDispatch function each frame where FSR3
+/// upscaling should be applied. See the documentation of
+/// ffxFsr3ContextDispatch for more details.
+///
+/// The FfxFsr3Context should be destroyed when use of it is
+/// completed, typically when an application is unloaded or FSR3 upscaling is
+/// disabled by a user. To destroy the FSR3 context you should call
+/// ffxFsr3ContextDestroy.
+///
+/// @param [out] context A pointer to a FfxFsr3Context structure to populate.
+/// @param [in] contextDescription A pointer to a FfxFsr3ContextDescription structure.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or contextDescription was NULL.
+/// @retval
+/// FFX_ERROR_INCOMPLETE_INTERFACE The operation failed because the FfxFsr3ContextDescription.callbacks was not fully specified.
+/// @retval
+/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend.
+///
+/// @ingroup FSR3
+FFX_API FfxErrorCode ffxFsr3ContextCreate(FfxFsr3Context* context, FfxFsr3ContextDescription* contextDescription);
+
+FFX_API FfxErrorCode ffxFsr3ContextGetGpuMemoryUsage(FfxFsr3Context* pContext,
+ FfxEffectMemoryUsage* pUpscalerUsage,
+ FfxEffectMemoryUsage* pOpticalFlowUsage,
+ FfxEffectMemoryUsage* pFrameGenerationUsage);
+
+/// Dispatch the various passes that constitute FidelityFX Super Resolution 3 Upscaling.
+///
+/// FSR3 is a composite effect, meaning that it is compromised of multiple
+/// constituent passes (implemented as one or more clears, copies and compute
+/// dispatches). The ffxFsr3ContextDispatchUpscale function is the
+/// function which (via the use of the functions contained in the
+/// callbacks field of the FfxFsr3Context
+/// structure) utlimately generates the sequence of graphics API calls required
+/// each frame.
+///
+/// As with the creation of the FfxFsr3Context correctly
+/// programming the dispatchParams is key to ensuring
+/// the correct operation of FSR3. It is particularly important to ensure that
+/// camera jitter is correctly applied to your application's projection matrix
+/// (or camera origin for raytraced applications). FSR3 provides the
+/// ffxFsr3GetJitterPhaseCount and
+/// ffxFsr3GetJitterOffset entry points to help applications
+/// correctly compute the camera jitter. Whatever jitter pattern is used by the
+/// application it should be correctly programmed to the
+/// jitterOffset field of the dispatchParams
+/// structure. For more guidance on camera jitter please consult the
+/// documentation for ffxFsr3GetJitterOffset as well as the
+/// accompanying overview documentation for FSR3.
+///
+/// @param [in] context A pointer to a FfxFsr3Context structure.
+/// @param [in] dispatchParams A pointer to a FfxFsr3DispatchUpscaleDescription structure.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or dispatchParams was NULL.
+/// @retval
+/// FFX_ERROR_OUT_OF_RANGE The operation failed because dispatchParams.renderSize was larger than the maximum render resolution.
+/// @retval
+/// FFX_ERROR_NULL_DEVICE The operation failed because the device inside the context was NULL.
+/// @retval
+/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend.
+///
+/// @ingroup FSR3
+FFX_API FfxErrorCode ffxFsr3ContextDispatchUpscale(FfxFsr3Context* context, const FfxFsr3DispatchUpscaleDescription* dispatchParams);
+FFX_API FfxErrorCode ffxFsr3ContextDispatchFrameGenerationPrepare(FfxFsr3Context* context, const FfxFsr3DispatchFrameGenerationPrepareDescription* dispatchParams);
+
+FFX_API FfxErrorCode ffxFsr3SkipPresent(FfxFsr3Context* context);
+
+/// A helper function generate a Reactive mask from an opaque only texure and one containing translucent objects.
+///
+/// @param [in] context A pointer to a FfxFsr3Context structure.
+/// @param [in] params A pointer to a FfxFsr3GenerateReactiveDescription structure
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+///
+/// @ingroup FSR3
+FFX_API FfxErrorCode ffxFsr3ContextGenerateReactiveMask(FfxFsr3Context* context, const FfxFsr3GenerateReactiveDescription* params);
+
+FFX_API FfxErrorCode ffxFsr3ConfigureFrameGeneration(FfxFsr3Context* context, const FfxFrameGenerationConfig* config);
+
+/// Destroy the FidelityFX Super Resolution context.
+///
+/// @param [out] context A pointer to a FfxFsr3Context structure to destroy.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context was NULL.
+///
+/// @ingroup FSR3
+FFX_API FfxErrorCode ffxFsr3ContextDestroy(FfxFsr3Context* context);
+
+/// Get the upscale ratio from the quality mode.
+///
+/// The following table enumerates the mapping of the quality modes to
+/// per-dimension scaling ratios.
+///
+/// Quality preset | Scale factor
+/// ----------------------------------------------------- | -------------
+/// FFX_FSR3_QUALITY_MODE_NATIVEAA | 1.0x
+/// FFX_FSR3_QUALITY_MODE_QUALITY | 1.5x
+/// FFX_FSR3_QUALITY_MODE_BALANCED | 1.7x
+/// FFX_FSR3_QUALITY_MODE_PERFORMANCE | 2.0x
+/// FFX_FSR3_QUALITY_MODE_ULTRA_PERFORMANCE | 3.0x
+///
+/// Passing an invalid qualityMode will return 0.0f.
+///
+/// @param [in] qualityMode The quality mode preset.
+///
+/// @returns
+/// The upscaling the per-dimension upscaling ratio for
+/// qualityMode according to the table above.
+///
+/// @ingroup FSR3
+FFX_API float ffxFsr3GetUpscaleRatioFromQualityMode(FfxFsr3QualityMode qualityMode);
+
+/// A helper function to calculate the rendering resolution from a target
+/// resolution and desired quality level.
+///
+/// This function applies the scaling factor returned by
+/// ffxFsr3GetUpscaleRatioFromQualityMode to each dimension.
+///
+/// @param [out] renderWidth A pointer to a uint32_t which will hold the calculated render resolution width.
+/// @param [out] renderHeight A pointer to a uint32_t which will hold the calculated render resolution height.
+/// @param [in] displayWidth The target display resolution width.
+/// @param [in] displayHeight The target display resolution height.
+/// @param [in] qualityMode The desired quality mode for FSR 2 upscaling.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_INVALID_POINTER Either renderWidth or renderHeight was NULL.
+/// @retval
+/// FFX_ERROR_INVALID_ENUM An invalid quality mode was specified.
+///
+/// @ingroup FSR3
+FFX_API FfxErrorCode ffxFsr3GetRenderResolutionFromQualityMode(
+ uint32_t* renderWidth,
+ uint32_t* renderHeight,
+ uint32_t displayWidth,
+ uint32_t displayHeight,
+ FfxFsr3QualityMode qualityMode);
+
+/// A helper function to calculate the jitter phase count from display
+/// resolution.
+///
+/// For more detailed information about the application of camera jitter to
+/// your application's rendering please refer to the
+/// ffxFsr3GetJitterOffset function.
+///
+/// The table below shows the jitter phase count which this function
+/// would return for each of the quality presets.
+///
+/// Quality preset | Scale factor | Phase count
+/// ----------------------------------------------------- | ------------- | ---------------
+/// FFX_FSR3_QUALITY_MODE_QUALITY | 1.5x | 18
+/// FFX_FSR3_QUALITY_MODE_BALANCED | 1.7x | 23
+/// FFX_FSR3_QUALITY_MODE_PERFORMANCE | 2.0x | 32
+/// FFX_FSR3_QUALITY_MODE_ULTRA_PERFORMANCE | 3.0x | 72
+/// Custom | [1..n]x | ceil(8*n^2)
+///
+/// @param [in] renderWidth The render resolution width.
+/// @param [in] displayWidth The display resolution width.
+///
+/// @returns
+/// The jitter phase count for the scaling factor between renderWidth and displayWidth.
+///
+/// @ingroup FSR3
+FFX_API int32_t ffxFsr3GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth);
+
+/// A helper function to calculate the subpixel jitter offset.
+///
+/// FSR3 relies on the application to apply sub-pixel jittering while rendering.
+/// This is typically included in the projection matrix of the camera. To make
+/// the application of camera jitter simple, the FSR3 API provides a small set
+/// of utility function which computes the sub-pixel jitter offset for a
+/// particular frame within a sequence of separate jitter offsets. To begin, the
+/// index within the jitter phase must be computed. To calculate the
+/// sequence's length, you can call the ffxFsr3GetJitterPhaseCount
+/// function. The index should be a value which is incremented each frame modulo
+/// the length of the sequence computed by ffxFsr3GetJitterPhaseCount.
+/// The index within the jitter phase is passed to
+/// ffxFsr3GetJitterOffset via the index parameter.
+///
+/// This function uses a Halton(2,3) sequence to compute the jitter offset.
+/// The ultimate index used for the sequence is index %
+/// phaseCount.
+///
+/// It is important to understand that the values returned from the
+/// ffxFsr3GetJitterOffset function are in unit pixel space, and
+/// in order to composite this correctly into a projection matrix we must
+/// convert them into projection offsets. This is done as per the pseudo code
+/// listing which is shown below.
+///
+/// const int32_t jitterPhaseCount = ffxFsr3GetJitterPhaseCount(renderWidth, displayWidth);
+///
+/// float jitterX = 0;
+/// float jitterY = 0;
+/// ffxFsr3GetJitterOffset(&jitterX, &jitterY, index, jitterPhaseCount);
+///
+/// const float jitterX = 2.0f * jitterX / (float)renderWidth;
+/// const float jitterY = -2.0f * jitterY / (float)renderHeight;
+/// const Matrix4 jitterTranslationMatrix = translateMatrix(Matrix3::identity, Vector3(jitterX, jitterY, 0));
+/// const Matrix4 jitteredProjectionMatrix = jitterTranslationMatrix * projectionMatrix;
+///
+/// Jitter should be applied to all rendering. This includes opaque, alpha
+/// transparent, and raytraced objects. For rasterized objects, the sub-pixel
+/// jittering values calculated by the iffxFsr3GetJitterOffset
+/// function can be applied to the camera projection matrix which is ultimately
+/// used to perform transformations during vertex shading. For raytraced
+/// rendering, the sub-pixel jitter should be applied to the ray's origin,
+/// often the camera's position.
+///
+/// Whether you elect to use the ffxFsr3GetJitterOffset function
+/// or your own sequence generator, you must program the
+/// jitterOffset field of the
+/// FfxFsr3DispatchParameters structure in order to inform FSR3
+/// of the jitter offset that has been applied in order to render each frame.
+///
+/// If not using the recommended ffxFsr3GetJitterOffset function,
+/// care should be taken that your jitter sequence never generates a null vector;
+/// that is value of 0 in both the X and Y dimensions.
+///
+/// @param [out] outX A pointer to a float which will contain the subpixel jitter offset for the x dimension.
+/// @param [out] outY A pointer to a float which will contain the subpixel jitter offset for the y dimension.
+/// @param [in] index The index within the jitter sequence.
+/// @param [in] phaseCount The length of jitter phase. See ffxFsr3GetJitterPhaseCount.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_INVALID_POINTER Either outX or outY was NULL.
+/// @retval
+/// FFX_ERROR_INVALID_ARGUMENT Argument phaseCount must be greater than 0.
+///
+/// @ingroup FSR3
+FFX_API FfxErrorCode ffxFsr3GetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount);
+
+/// A helper function to check if a resource is
+/// FFX_FSR3_RESOURCE_IDENTIFIER_NULL.
+///
+/// @param [in] resource A FfxResource.
+///
+/// @returns
+/// true The resource was not FFX_FSR3_RESOURCE_IDENTIFIER_NULL.
+/// @returns
+/// false The resource was FFX_FSR3_RESOURCE_IDENTIFIER_NULL.
+///
+/// @ingroup FSR3
+FFX_API bool ffxFsr3ResourceIsNull(FfxResource resource);
+
+/// Queries the effect version number.
+///
+/// @returns
+/// The SDK version the effect was built with.
+///
+/// @ingroup FSR3
+FFX_API FfxVersionNumber ffxFsr3GetEffectVersion();
+
+#if defined(__cplusplus)
+}
+#endif // #if defined(__cplusplus)
diff --git a/thirdparty/amd-ffx/ffx_fsr3_private.h b/thirdparty/amd-ffx/ffx_fsr3_private.h
new file mode 100644
index 000000000000..a772daa2106c
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_fsr3_private.h
@@ -0,0 +1,62 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+#include "gpu/fsr3/ffx_fsr3_resources.h"
+#include "ffx_fsr3upscaler.h"
+#include "ffx_frameinterpolation.h"
+#include "ffx_opticalflow.h"
+#include "ffx_fsr3.h"
+
+// max queued frames for descriptor management
+#define FSR3_MAX_QUEUED_FRAMES 2
+
+// FfxFsr3Context_Private
+// The private implementation of the FSR3 context.
+// Actually this is only a container for Upscaler+Frameinterpolation+OpticalFlow
+typedef struct FfxFsr3Context_Private {
+ FfxFsr3ContextDescription description;
+ FfxInterface backendInterfaceSharedResources;
+ FfxInterface backendInterfaceUpscaling;
+ FfxInterface backendInterfaceFrameInterpolation;
+ FfxFsr3UpscalerContext upscalerContext;
+ FfxOpticalflowContext ofContext;
+ FfxFrameInterpolationContext fiContext;
+ FfxResourceInternal sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_COUNT];
+ FfxUInt32 effectContextIdSharedResources;
+ FfxUInt32 effectContextIdFrameGeneration;
+ float deltaTime;
+ bool upscalingOnly;
+ bool interpolationOnly;
+ bool asyncWorkloadSupported;
+ FfxUInt32 sharedResourceCount;
+ FfxDimensions2D renderSize; ///< The dimensions used to render game content, dilatedDepth, dilatedMotionVectors are expected to be of ths size.
+
+ FfxResource HUDLess_color;
+ FfxResource dilatedDepth;
+ FfxResource dilatedMotionVectors;
+ FfxResource reconstructedPrevNearestDepth;
+
+ bool frameGenerationEnabled;
+ int32_t frameGenerationFlags;
+ FfxFrameInterpolationPrepareDescription fgPrepareDescriptions[FSR3_MAX_QUEUED_FRAMES];
+} FfxFsr3Context_Private;
diff --git a/thirdparty/amd-ffx/ffx_fsr3upscaler.cpp b/thirdparty/amd-ffx/ffx_fsr3upscaler.cpp
new file mode 100644
index 000000000000..1e16eaa772ab
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_fsr3upscaler.cpp
@@ -0,0 +1,1517 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include // for max used inside SPD CPU code.
+#include // for fabs, abs, sinf, sqrt, etc.
+#include // for memset
+#include // for FLT_EPSILON
+#include "ffx_fsr3upscaler.h"
+
+#define FFX_CPU
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wunused-function"
+#pragma clang diagnostic ignored "-Wsign-compare"
+#endif
+
+#include "gpu/ffx_core.h"
+#include "gpu/fsr1/ffx_fsr1.h"
+#include "gpu/spd/ffx_spd.h"
+#include "gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h"
+#include "gpu/fsr3upscaler/ffx_fsr3upscaler_resources.h"
+#include "gpu/fsr3upscaler/ffx_fsr3upscaler_common.h"
+#include "ffx_object_management.h"
+
+// max queued frames for descriptor management
+static const uint32_t FSR3UPSCALER_MAX_QUEUED_FRAMES = 16;
+
+#include "ffx_fsr3upscaler_private.h"
+
+// lists to map shader resource bindpoint name to resource identifier
+typedef struct ResourceBinding
+{
+ uint32_t index;
+ wchar_t name[64];
+}ResourceBinding;
+
+static const ResourceBinding srvTextureBindingTable[] =
+{
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR, L"r_input_color_jittered"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY, L"r_input_opaque_only"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS, L"r_input_motion_vectors"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_DEPTH, L"r_input_depth" },
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE, L"r_input_exposure"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FRAME_INFO, L"r_frame_info"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK, L"r_reactive_mask"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK, L"r_transparency_and_composition_mask"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"r_reconstructed_previous_nearest_depth"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, L"r_dilated_motion_vectors"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"r_dilated_depth"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR, L"r_internal_upscaled_color"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION, L"r_accumulation"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY, L"r_luma_history" },
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RCAS_INPUT, L"r_rcas_input"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LANCZOS_LUT, L"r_lanczos_lut"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS, L"r_spd_mips"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, L"r_dilated_reactive_masks"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NEW_LOCKS, L"r_new_locks"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH, L"r_farthest_depth"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH_MIP1, L"r_farthest_depth_mip1"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SHADING_CHANGE, L"r_shading_change"},
+
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_CURRENT_LUMA, L"r_current_luma"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREVIOUS_LUMA, L"r_previous_luma"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_INSTABILITY, L"r_luma_instability"},
+};
+
+static const ResourceBinding uavTextureBindingTable[] =
+{
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"rw_reconstructed_previous_nearest_depth"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, L"rw_dilated_motion_vectors"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"rw_dilated_depth"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR, L"rw_internal_upscaled_color"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION, L"rw_accumulation"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY, L"rw_luma_history"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT, L"rw_upscaled_output"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, L"rw_dilated_reactive_masks"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FRAME_INFO, L"rw_frame_info"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT, L"rw_spd_global_atomic"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NEW_LOCKS, L"rw_new_locks"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOREACTIVE, L"rw_output_autoreactive"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SHADING_CHANGE, L"rw_shading_change"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH, L"rw_farthest_depth"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH_MIP1, L"rw_farthest_depth_mip1"},
+
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_CURRENT_LUMA, L"rw_current_luma"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_INSTABILITY, L"rw_luma_instability"},
+
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_0, L"rw_spd_mip0"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_1, L"rw_spd_mip1"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_2, L"rw_spd_mip2"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_3, L"rw_spd_mip3"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_4, L"rw_spd_mip4"},
+ {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_5, L"rw_spd_mip5"},
+
+
+};
+
+static const ResourceBinding constantBufferBindingTable[] =
+{
+ {FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_FSR3UPSCALER, L"cbFSR3Upscaler"},
+ {FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_SPD, L"cbSPD"},
+ {FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_RCAS, L"cbRCAS"},
+ {FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE, L"cbGenerateReactive"},
+};
+
+typedef struct Fsr3UpscalerRcasConstants {
+
+ uint32_t rcasConfig[4];
+} FfxRcasConstants;
+
+typedef struct Fsr3UpscalerSpdConstants {
+
+ uint32_t mips;
+ uint32_t numworkGroups;
+ uint32_t workGroupOffset[2];
+ uint32_t renderSize[2];
+} Fsr3UpscalerSpdConstants;
+
+typedef struct Fsr3UpscalerGenerateReactiveConstants
+{
+ float scale;
+ float threshold;
+ float binaryValue;
+ uint32_t flags;
+
+} Fsr3UpscalerGenerateReactiveConstants;
+
+typedef struct Fsr3UpscalerGenerateReactiveConstants2
+{
+ float autoTcThreshold;
+ float autoTcScale;
+ float autoReactiveScale;
+ float autoReactiveMax;
+
+} Fsr3UpscalerGenerateReactiveConstants2;
+
+typedef union Fsr3UpscalerSecondaryUnion {
+
+ Fsr3UpscalerRcasConstants rcas;
+ Fsr3UpscalerSpdConstants spd;
+ Fsr3UpscalerGenerateReactiveConstants2 autogenReactive;
+} Fsr3UpscalerSecondaryUnion;
+
+// Lanczos
+static float lanczos2(float value)
+{
+ return abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value));
+}
+
+// Calculate halton number for index and base.
+static float halton(int32_t index, int32_t base)
+{
+ float f = 1.0f, result = 0.0f;
+
+ for (int32_t currentIndex = index; currentIndex > 0;) {
+
+ f /= (float)base;
+ result = result + f * (float)(currentIndex % base);
+ currentIndex = (uint32_t)(floorf((float)(currentIndex) / (float)(base)));
+ }
+
+ return result;
+}
+
+static void fsr3upscalerDebugCheckDispatch(FfxFsr3UpscalerContext_Private* context, const FfxFsr3UpscalerDispatchDescription* params)
+{
+ if (params->commandList == nullptr)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"commandList is null");
+ }
+
+ if (params->color.resource == nullptr)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"color resource is null");
+ }
+
+ if (params->depth.resource == nullptr)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"depth resource is null");
+ }
+
+ if (params->motionVectors.resource == nullptr)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"motionVectors resource is null");
+ }
+
+ if (params->exposure.resource != nullptr)
+ {
+ if ((context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_AUTO_EXPOSURE) == FFX_FSR3UPSCALER_ENABLE_AUTO_EXPOSURE)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"exposure resource provided, however auto exposure flag is present");
+ }
+ }
+
+ if (params->output.resource == nullptr)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"output resource is null");
+ }
+
+ if (fabs(params->jitterOffset.x) > 1.0f || fabs(params->jitterOffset.y) > 1.0f)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"jitterOffset contains value outside of expected range [-1.0, 1.0]");
+ }
+
+ if ((params->motionVectorScale.x > (float)context->contextDescription.maxRenderSize.width) ||
+ (params->motionVectorScale.y > (float)context->contextDescription.maxRenderSize.height))
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"motionVectorScale contains scale value greater than maxRenderSize");
+ }
+ if ((params->motionVectorScale.x == 0.0f) ||
+ (params->motionVectorScale.y == 0.0f))
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"motionVectorScale contains zero scale value");
+ }
+
+ if ((params->renderSize.width > context->contextDescription.maxRenderSize.width) ||
+ (params->renderSize.height > context->contextDescription.maxRenderSize.height))
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"renderSize is greater than context maxRenderSize");
+ }
+ if ((params->renderSize.width == 0) ||
+ (params->renderSize.height == 0))
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"renderSize contains zero dimension");
+ }
+
+ if (params->sharpness < 0.0f || params->sharpness > 1.0f)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"sharpness contains value outside of expected range [0.0, 1.0]");
+ }
+
+ if (params->frameTimeDelta < 1.0f)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"frameTimeDelta is less than 1.0f - this value should be milliseconds (~16.6f for 60fps)");
+ }
+
+ if (params->preExposure == 0.0f)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"preExposure provided as 0.0f which is invalid");
+ }
+
+ bool infiniteDepth = (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE) == FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE;
+ bool inverseDepth = (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED) == FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED;
+
+ if (inverseDepth)
+ {
+ if (params->cameraNear < params->cameraFar)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING,
+ L"FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED flag is present yet cameraNear is less than cameraFar");
+ }
+ if (infiniteDepth)
+ {
+ if (params->cameraNear != FLT_MAX)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING,
+ L"FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE and FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED present, yet cameraNear != FLT_MAX");
+ }
+ }
+ if (params->cameraFar < 0.075f)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING,
+ L"FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED present, cameraFar value is very low which may result in depth separation artefacting");
+ }
+ }
+ else
+ {
+ if (params->cameraNear > params->cameraFar)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING,
+ L"cameraNear is greater than cameraFar in non-inverted-depth context");
+ }
+ if (infiniteDepth)
+ {
+ if (params->cameraFar != FLT_MAX)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING,
+ L"FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE present, yet cameraFar != FLT_MAX");
+ }
+ }
+ if (params->cameraNear < 0.075f)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING,
+ L"cameraNear value is very low which may result in depth separation artefacting");
+ }
+ }
+
+ if (params->cameraFovAngleVertical <= 0.0f)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is 0.0f - this value should be > 0.0f");
+ }
+ if (params->cameraFovAngleVertical > FFX_PI)
+ {
+ FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is greater than 180 degrees/PI");
+ }
+}
+
+static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
+{
+ for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvTextureCount; ++srvIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(srvTextureBindingTable); ++mapIndex)
+ {
+ if (0 == wcscmp(srvTextureBindingTable[mapIndex].name, inoutPipeline->srvTextureBindings[srvIndex].name))
+ break;
+ }
+ if (mapIndex == _countof(srvTextureBindingTable))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->srvTextureBindings[srvIndex].resourceIdentifier = srvTextureBindingTable[mapIndex].index;
+ }
+
+ for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavTextureCount; ++uavIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(uavTextureBindingTable); ++mapIndex)
+ {
+ if (0 == wcscmp(uavTextureBindingTable[mapIndex].name, inoutPipeline->uavTextureBindings[uavIndex].name))
+ break;
+ }
+ if (mapIndex == _countof(uavTextureBindingTable))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->uavTextureBindings[uavIndex].resourceIdentifier = uavTextureBindingTable[mapIndex].index;
+ }
+
+ for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(constantBufferBindingTable); ++mapIndex)
+ {
+ if (0 == wcscmp(constantBufferBindingTable[mapIndex].name, inoutPipeline->constantBufferBindings[cbIndex].name))
+ break;
+ }
+ if (mapIndex == _countof(constantBufferBindingTable))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->constantBufferBindings[cbIndex].resourceIdentifier = constantBufferBindingTable[mapIndex].index;
+ }
+
+ return FFX_OK;
+}
+
+static uint32_t getPipelinePermutationFlags(uint32_t contextFlags, FfxFsr3UpscalerPass passId, bool fp16, bool force64, bool useLut)
+{
+ // work out what permutation to load.
+ uint32_t flags = 0;
+ flags |= (contextFlags & FFX_FSR3UPSCALER_ENABLE_HIGH_DYNAMIC_RANGE) ? FSR3UPSCALER_SHADER_PERMUTATION_HDR_COLOR_INPUT : 0;
+ flags |= (contextFlags & FFX_FSR3UPSCALER_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) ? 0 : FSR3UPSCALER_SHADER_PERMUTATION_LOW_RES_MOTION_VECTORS;
+ flags |= (contextFlags & FFX_FSR3UPSCALER_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) ? FSR3UPSCALER_SHADER_PERMUTATION_JITTER_MOTION_VECTORS : 0;
+ flags |= (contextFlags & FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED) ? FSR3UPSCALER_SHADER_PERMUTATION_DEPTH_INVERTED : 0;
+ flags |= (passId == FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN) ? FSR3UPSCALER_SHADER_PERMUTATION_ENABLE_SHARPENING : 0;
+ flags |= (useLut) ? FSR3UPSCALER_SHADER_PERMUTATION_USE_LANCZOS_TYPE : 0;
+ flags |= (force64) ? FSR3UPSCALER_SHADER_PERMUTATION_FORCE_WAVE64 : 0;
+#if defined(_GAMING_XBOX)
+ /** On Xbox we enable 16-bit math, and use 32-bit within the shader only where it's necessary. */
+ flags |= (fp16) ? FSR3UPSCALER_SHADER_PERMUTATION_ALLOW_FP16 : 0;
+#else
+ flags |= (fp16 && (passId != FFX_FSR3UPSCALER_PASS_RCAS)) ? FSR3UPSCALER_SHADER_PERMUTATION_ALLOW_FP16 : 0;
+#endif // defined(_GAMING_XBOX)
+ return flags;
+}
+
+static FfxErrorCode createPipelineStates(FfxFsr3UpscalerContext_Private* context)
+{
+ FFX_ASSERT(context);
+
+ FfxPipelineDescription pipelineDescription = {};
+ pipelineDescription.contextFlags = context->contextDescription.flags;
+ pipelineDescription.stage = FFX_BIND_COMPUTE_SHADER_STAGE;
+
+ // Samplers
+ pipelineDescription.samplerCount = 2;
+ FfxSamplerDescription samplerDescs[2] = { { FFX_FILTER_TYPE_MINMAGMIP_POINT, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE },
+ { FFX_FILTER_TYPE_MINMAGMIP_LINEAR, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE} };
+ pipelineDescription.samplers = samplerDescs;
+
+ // Root constants
+ pipelineDescription.rootConstantBufferCount = 2;
+ FfxRootConstantDescription rootConstantDescs[2] = { {sizeof(Fsr3UpscalerConstants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE },
+ { sizeof(Fsr3UpscalerSecondaryUnion) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE } };
+ pipelineDescription.rootConstants = rootConstantDescs;
+
+ // Query device capabilities
+ FfxDeviceCapabilities capabilities;
+ context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &capabilities);
+
+ // Setup a few options used to determine permutation flags
+ bool haveShaderModel66 = capabilities.maximumSupportedShaderModel >= FFX_SHADER_MODEL_6_6;
+ bool supportedFP16 = capabilities.fp16Supported;
+ bool canForceWave64 = false;
+ bool useLut = false;
+
+ const uint32_t waveLaneCountMin = capabilities.waveLaneCountMin;
+ const uint32_t waveLaneCountMax = capabilities.waveLaneCountMax;
+ if (waveLaneCountMin == 32 && waveLaneCountMax == 64)
+ {
+ useLut = true;
+ canForceWave64 = haveShaderModel66;
+ }
+ else
+ {
+ canForceWave64 = false;
+ }
+
+ // Work out what permutation to load.
+ uint32_t contextFlags = context->contextDescription.flags;
+
+ // Set up pipeline descriptor (basically RootSignature and binding)
+ wcscpy_s(pipelineDescription.name, L"FSR3-LUMA-PYRAMID");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_LUMA_PYRAMID,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_LUMA_PYRAMID, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineLumaPyramid));
+ wcscpy_s(pipelineDescription.name, L"FSR3-RCAS");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_RCAS,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_RCAS, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineRCAS));
+ wcscpy_s(pipelineDescription.name, L"FSR3-GEN_REACTIVE");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_GENERATE_REACTIVE,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_GENERATE_REACTIVE, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineGenerateReactive));
+
+ pipelineDescription.rootConstantBufferCount = 1;
+
+ wcscpy_s(pipelineDescription.name, L"FSR3-PREPARE-INPUTS");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_PREPARE_INPUTS,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_PREPARE_INPUTS, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelinePrepareInputs));
+
+ wcscpy_s(pipelineDescription.name, L"FSR3-PREPARE-REACTIVITY");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_PREPARE_REACTIVITY,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_PREPARE_REACTIVITY, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelinePrepareReactivity));
+
+ wcscpy_s(pipelineDescription.name, L"FSR3-SHADING-CHANGE");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_SHADING_CHANGE,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_SHADING_CHANGE, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineShadingChange));
+
+ wcscpy_s(pipelineDescription.name, L"FSR3-ACCUMULATE");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_ACCUMULATE,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_ACCUMULATE, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineAccumulate));
+
+ wcscpy_s(pipelineDescription.name, L"FSR3-ACCUM_SHARP");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineAccumulateSharpen));
+
+ wcscpy_s(pipelineDescription.name, L"FSR3-SHADING-CHANGE-PYRAMID");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_SHADING_CHANGE_PYRAMID,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_SHADING_CHANGE_PYRAMID, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineShadingChangePyramid));
+
+ wcscpy_s(pipelineDescription.name, L"FSR3-LUMA-INSTABILITY");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_LUMA_INSTABILITY,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_LUMA_INSTABILITY, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineLumaInstability));
+
+ wcscpy_s(pipelineDescription.name, L"FSR3-DEBUG-VIEW");
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_DEBUG_VIEW,
+ getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_DEBUG_VIEW, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription, context->effectContextId, &context->pipelineDebugView));
+
+ // for each pipeline: re-route/fix-up IDs based on names
+ FFX_VALIDATE(patchResourceBindings(&context->pipelinePrepareInputs));
+ FFX_VALIDATE(patchResourceBindings(&context->pipelinePrepareReactivity));
+ FFX_VALIDATE(patchResourceBindings(&context->pipelineShadingChange));
+ FFX_VALIDATE(patchResourceBindings(&context->pipelineAccumulate));
+ FFX_VALIDATE(patchResourceBindings(&context->pipelineLumaPyramid));
+ FFX_VALIDATE(patchResourceBindings(&context->pipelineAccumulateSharpen));
+ FFX_VALIDATE(patchResourceBindings(&context->pipelineRCAS));
+ FFX_VALIDATE(patchResourceBindings(&context->pipelineGenerateReactive));
+ FFX_VALIDATE(patchResourceBindings(&context->pipelineTcrAutogenerate));
+ FFX_VALIDATE(patchResourceBindings(&context->pipelineShadingChangePyramid));
+ FFX_VALIDATE(patchResourceBindings(&context->pipelineLumaInstability));
+ FFX_VALIDATE(patchResourceBindings(&context->pipelineDebugView));
+
+ return FFX_OK;
+}
+
+static FfxErrorCode generateReactiveMaskInternal(FfxFsr3UpscalerContext_Private* contextPrivate, const FfxFsr3UpscalerDispatchDescription* params);
+
+static FfxErrorCode fsr3upscalerCreate(FfxFsr3UpscalerContext_Private* context, const FfxFsr3UpscalerContextDescription* contextDescription)
+{
+ FFX_ASSERT(context);
+ FFX_ASSERT(contextDescription);
+
+ // Setup the data for implementation.
+ memset(context, 0, sizeof(FfxFsr3UpscalerContext_Private));
+ context->device = contextDescription->backendInterface.device;
+
+ memcpy(&context->contextDescription, contextDescription, sizeof(FfxFsr3UpscalerContextDescription));
+
+ // Check version info - make sure we are linked with the right backend version
+ FfxVersionNumber version = context->contextDescription.backendInterface.fpGetSDKVersion(&context->contextDescription.backendInterface);
+ FFX_RETURN_ON_ERROR(version == FFX_SDK_MAKE_VERSION(1, 1, 4), FFX_ERROR_INVALID_VERSION);
+
+ // Create the context.
+ FfxErrorCode errorCode = context->contextDescription.backendInterface.fpCreateBackendContext(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, nullptr, &context->effectContextId);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ // call out for device caps.
+ errorCode = context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &context->deviceCapabilities);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ // set defaults
+ context->firstExecution = true;
+ context->resourceFrameIndex = 0;
+
+ context->constants.maxUpscaleSize[0] = contextDescription->maxUpscaleSize.width;
+ context->constants.maxUpscaleSize[1] = contextDescription->maxUpscaleSize.height;
+ context->constants.velocityFactor = 1.0f;
+ context->constants.reactivenessScale = 1.0f;
+ context->constants.shadingChangeScale = 1.0f;
+ context->constants.accumulationAddedPerFrame = 1.0f/3.0f;
+ context->constants.minDisocclusionAccumulation = -1.0f/3.0f;
+
+ // generate the data for the LUT.
+ const uint32_t lanczos2LutWidth = 128;
+ int16_t lanczos2Weights[lanczos2LutWidth] = { };
+
+ for (uint32_t currentLanczosWidthIndex = 0; currentLanczosWidthIndex < lanczos2LutWidth; currentLanczosWidthIndex++) {
+
+ const float x = 2.0f * currentLanczosWidthIndex / float(lanczos2LutWidth - 1);
+ const float y = lanczos2(x);
+ lanczos2Weights[currentLanczosWidthIndex] = int16_t(roundf(y * 32767.0f));
+ }
+
+ uint8_t defaultReactiveMaskData = 0U;
+ uint32_t atomicInitData = 0U;
+ float defaultExposure[] = { 0.0f, 0.0f };
+
+ const FfxDimensions2D maxRenderSizeDiv2 = { contextDescription->maxRenderSize.width / 2, contextDescription->maxRenderSize.height / 2 };
+
+ // declare internal resources needed
+ const FfxInternalResourceDescription internalSurfaceDesc[] = {
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_1, L"FSR3UPSCALER_Accumulation1", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_2, L"FSR3UPSCALER_Accumulation2", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_1, L"FSR3UPSCALER_Luma1", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_2, L"FSR3UPSCALER_Luma2", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1, L"FSR3UPSCALER_IntermediateFp16x1", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET),
+ FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SHADING_CHANGE, L"FSR3UPSCALER_ShadingChange", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET),
+ FFX_SURFACE_FORMAT_R8_UNORM, maxRenderSizeDiv2.width, maxRenderSizeDiv2.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NEW_LOCKS, L"FSR3UPSCALER_NewLocks", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxUpscaleSize.width, contextDescription->maxUpscaleSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1, L"FSR3UPSCALER_InternalUpscaled1", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET),
+ FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxUpscaleSize.width, contextDescription->maxUpscaleSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2, L"FSR3UPSCALER_InternalUpscaled2", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET),
+ FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxUpscaleSize.width, contextDescription->maxUpscaleSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS, L"FSR3UPSCALER_SpdMips", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_FLOAT, maxRenderSizeDiv2.width, maxRenderSizeDiv2.height, 0, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH_MIP1, L"FSR3UPSCALER_FarthestDepthMip1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16_FLOAT, maxRenderSizeDiv2.width, maxRenderSizeDiv2.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_1, L"FSR3UPSCALER_LumaHistory1", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_2, L"FSR3UPSCALER_LumaHistory2", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT, L"FSR3UPSCALER_SpdAtomicCounter", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV),
+ FFX_SURFACE_FORMAT_R32_UINT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, FfxResourceInitData::FfxResourceInitValue(sizeof(atomicInitData), 0) },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, L"FSR3UPSCALER_DilatedReactiveMasks", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET),
+ FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LANCZOS_LUT, L"FSR3UPSCALER_LanczosLutData", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_READ_ONLY,
+ FFX_SURFACE_FORMAT_R16_SNORM, lanczos2LutWidth, 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_BUFFER, sizeof(lanczos2Weights), lanczos2Weights} },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY, L"FSR3UPSCALER_DefaultReactivityMask", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_READ_ONLY,
+ FFX_SURFACE_FORMAT_R8_UNORM, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, FfxResourceInitData::FfxResourceInitValue(sizeof(defaultReactiveMaskData), defaultReactiveMaskData) },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE, L"FSR3UPSCALER_DefaultExposure", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_READ_ONLY,
+ FFX_SURFACE_FORMAT_R32G32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, FfxResourceInitData::FfxResourceInitBuffer(sizeof(defaultExposure), defaultExposure) },
+
+ { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FRAME_INFO, L"FSR3UPSCALER_FrameInfo", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ };
+
+ // clear the SRV resources to NULL.
+ memset(context->srvResources, 0, sizeof(context->srvResources));
+
+ for (int32_t currentSurfaceIndex = 0; currentSurfaceIndex < FFX_ARRAY_ELEMENTS(internalSurfaceDesc); ++currentSurfaceIndex) {
+
+ const FfxInternalResourceDescription* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex];
+ const FfxResourceType resourceType = internalSurfaceDesc[currentSurfaceIndex].type;
+ const FfxResourceDescription resourceDescription = {resourceType,
+ currentSurfaceDescription->format,
+ currentSurfaceDescription->width,
+ currentSurfaceDescription->height,
+ 1,
+ currentSurfaceDescription->mipCount,
+ currentSurfaceDescription->flags,
+ currentSurfaceDescription->usage};
+ const FfxResourceStates initialState = (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_READ_ONLY) ? FFX_RESOURCE_STATE_COMPUTE_READ : FFX_RESOURCE_STATE_UNORDERED_ACCESS;
+ const FfxCreateResourceDescription createResourceDescription = { FFX_HEAP_TYPE_DEFAULT, resourceDescription, initialState, currentSurfaceDescription->name, currentSurfaceDescription->id, currentSurfaceDescription->initData };
+
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreateResource(&context->contextDescription.backendInterface, &createResourceDescription, context->effectContextId, &context->srvResources[currentSurfaceDescription->id]));
+ }
+
+ // copy resources to uavResrouces list
+ memcpy(context->uavResources, context->srvResources, sizeof(context->srvResources));
+
+ // avoid compiling pipelines on first render
+ {
+ errorCode = createPipelineStates(context);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+ }
+
+ return FFX_OK;
+}
+
+static FfxErrorCode fsr3upscalerRelease(FfxFsr3UpscalerContext_Private* context)
+{
+ FFX_ASSERT(context);
+
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelinePrepareInputs, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelinePrepareReactivity, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineShadingChange, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineAccumulate, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineAccumulateSharpen, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineRCAS, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineLumaPyramid, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineGenerateReactive, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineTcrAutogenerate, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineShadingChangePyramid, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineLumaInstability, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineDebugView, context->effectContextId);
+
+ // Unregister external resources
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_DEPTH] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+
+ // Unregister references
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RCAS_INPUT] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_CURRENT_LUMA] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREVIOUS_LUMA] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_INSTABILITY] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL };
+
+ // Release the copy resources for those that had init data
+ ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT], context->effectContextId);
+ ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LANCZOS_LUT], context->effectContextId);
+ ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY], context->effectContextId);
+ ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE], context->effectContextId);
+
+ // release internal resources
+ for (int32_t currentResourceIndex = 0; currentResourceIndex < FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT; ++currentResourceIndex) {
+
+ ffxSafeReleaseResource(&context->contextDescription.backendInterface, context->srvResources[currentResourceIndex], context->effectContextId);
+ }
+
+ // Destroy the context
+ context->contextDescription.backendInterface.fpDestroyBackendContext(&context->contextDescription.backendInterface, context->effectContextId);
+
+ return FFX_OK;
+}
+
+static void setupDeviceDepthToViewSpaceDepthParams(FfxFsr3UpscalerContext_Private* context, const FfxFsr3UpscalerDispatchDescription* params)
+{
+ const bool bInverted = (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED) == FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED;
+ const bool bInfinite = (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE) == FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE;
+
+ // make sure it has no impact if near and far plane values are swapped in dispatch params
+ // the flags "inverted" and "infinite" will decide what transform to use
+ float fMin = FFX_MINIMUM(params->cameraNear, params->cameraFar);
+ float fMax = FFX_MAXIMUM(params->cameraNear, params->cameraFar);
+
+ if (bInverted) {
+ float tmp = fMin;
+ fMin = fMax;
+ fMax = tmp;
+ }
+
+ // a 0 0 0 x
+ // 0 b 0 0 y
+ // 0 0 c d z
+ // 0 0 e 0 1
+
+ const float fQ = fMax / (fMin - fMax);
+ const float d = -1.0f; // for clarity
+
+ const float matrix_elem_c[2][2] = {
+ fQ, // non reversed, non infinite
+ -1.0f - FLT_EPSILON, // non reversed, infinite
+ fQ, // reversed, non infinite
+ 0.0f + FLT_EPSILON // reversed, infinite
+ };
+
+ const float matrix_elem_e[2][2] = {
+ fQ * fMin, // non reversed, non infinite
+ -fMin - FLT_EPSILON, // non reversed, infinite
+ fQ * fMin, // reversed, non infinite
+ fMax, // reversed, infinite
+ };
+
+ context->constants.deviceToViewDepth[0] = d * matrix_elem_c[bInverted][bInfinite];
+ context->constants.deviceToViewDepth[1] = matrix_elem_e[bInverted][bInfinite];
+
+ // revert x and y coords
+ const float aspect = params->renderSize.width / float(params->renderSize.height);
+ const float cotHalfFovY = cosf(0.5f * params->cameraFovAngleVertical) / sinf(0.5f * params->cameraFovAngleVertical);
+ const float a = cotHalfFovY / aspect;
+ const float b = cotHalfFovY;
+
+ context->constants.deviceToViewDepth[2] = (1.0f / a);
+ context->constants.deviceToViewDepth[3] = (1.0f / b);
+}
+
+static void scheduleDispatch(FfxFsr3UpscalerContext_Private* context, const FfxFsr3UpscalerDispatchDescription*, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY)
+{
+ FfxComputeJobDescription jobDescriptor = {};
+
+ for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) {
+
+ const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier;
+ const FfxResourceInternal currentResource = context->srvResources[currentResourceId];
+ jobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource;
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.srvTextures[currentShaderResourceViewIndex].name, pipeline->srvTextureBindings[currentShaderResourceViewIndex].name);
+#endif
+ }
+
+ for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavTextureCount; ++currentUnorderedAccessViewIndex) {
+
+ const uint32_t currentResourceId = pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].resourceIdentifier;
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].name, pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].name);
+#endif
+
+ if (currentResourceId >= FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_0 && currentResourceId <= FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_5)
+ {
+ const FfxResourceInternal currentResource = context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS];
+ jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource;
+ jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = currentResourceId - FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_0;
+ }
+ else
+ {
+ const FfxResourceInternal currentResource = context->uavResources[currentResourceId];
+ jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource;
+ jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = 0;
+ }
+ }
+
+ jobDescriptor.dimensions[0] = dispatchX;
+ jobDescriptor.dimensions[1] = dispatchY;
+ jobDescriptor.dimensions[2] = 1;
+ jobDescriptor.pipeline = *pipeline;
+
+ for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) {
+#ifdef FFX_DEBUG
+ wcscpy_s( jobDescriptor.cbNames[currentRootConstantIndex], pipeline->constantBufferBindings[currentRootConstantIndex].name);
+#endif
+ jobDescriptor.cbs[currentRootConstantIndex] = context->constantBuffers[pipeline->constantBufferBindings[currentRootConstantIndex].resourceIdentifier];
+ }
+
+ FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE };
+ wcscpy_s(dispatchJob.jobLabel, pipeline->name);
+ dispatchJob.computeJobDescriptor = jobDescriptor;
+
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &dispatchJob);
+}
+
+FFX_API FfxErrorCode ffxFsr3UpscalerGetSharedResourceDescriptions(FfxFsr3UpscalerContext* context, FfxFsr3UpscalerSharedResourceDescriptions* SharedResources)
+{
+ FFX_RETURN_ON_ERROR(
+ context,
+ FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(
+ SharedResources,
+ FFX_ERROR_INVALID_POINTER);
+
+ FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context);
+ SharedResources->dilatedDepth = { FFX_HEAP_TYPE_DEFAULT, { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R32_FLOAT, contextPrivate->contextDescription.maxRenderSize.width, contextPrivate->contextDescription.maxRenderSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET) },
+ FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"FSR3UPSCALER_DilatedDepth", FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} };
+ SharedResources->dilatedMotionVectors = { FFX_HEAP_TYPE_DEFAULT, { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R16G16_FLOAT, contextPrivate->contextDescription.maxRenderSize.width, contextPrivate->contextDescription.maxRenderSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET) },
+ FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"FSR3UPSCALER_DilatedVelocity", FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} };
+ SharedResources->reconstructedPrevNearestDepth = { FFX_HEAP_TYPE_DEFAULT, { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R32_UINT, contextPrivate->contextDescription.maxRenderSize.width, contextPrivate->contextDescription.maxRenderSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV) },
+ FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"FSR3UPSCALER_ReconstructedPrevNearestDepth", FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} };
+
+ return FFX_OK;
+}
+
+static FfxErrorCode fsr3upscalerDispatch(FfxFsr3UpscalerContext_Private* context, const FfxFsr3UpscalerDispatchDescription* params)
+{
+
+ if ((context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DEBUG_CHECKING) == FFX_FSR3UPSCALER_ENABLE_DEBUG_CHECKING)
+ {
+ fsr3upscalerDebugCheckDispatch(context, params);
+ }
+
+ // take a short cut to the command list
+ FfxCommandList commandList = params->commandList;
+
+ if (context->firstExecution)
+ {
+ FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT };
+
+ const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f };
+ memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float));
+
+ wcscpy_s(clearJob.jobLabel, L"Clear Accumulation 1");
+ clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_1];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Accumulation 2");
+ clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_2];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+
+ wcscpy_s(clearJob.jobLabel, L"Clear Temporal Luma 1");
+ clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_1];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Temporal Luma 2");
+ clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_2];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ }
+
+ // Prepare per frame descriptor tables
+ const bool isOddFrame = !!(context->resourceFrameIndex & 1);
+ const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT : 0;
+ const uint32_t currentGpuTableBase = 2 * FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex;
+ const uint32_t accumulationSrvResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_2 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_1;
+ const uint32_t accumulationUavResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_1 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_2;
+ const uint32_t upscaledColorSrvResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1;
+ const uint32_t upscaledColorUavResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2;
+ const uint32_t lumaHistorySrvResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_1;
+ const uint32_t lumaHistoryUavResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_2;
+ const uint32_t currentLumaSrvResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_2 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_1;
+ const uint32_t currentLumaUavResourceIndex = currentLumaSrvResourceIndex;
+ const uint32_t previousLumaSrvResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_1 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_2;
+
+ const bool resetAccumulation = params->reset || context->firstExecution;
+ context->firstExecution = false;
+
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->color, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR]);
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->depth, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_DEPTH]);
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->motionVectors, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS]);
+
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->dilatedMotionVectors, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS]);
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS];
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->dilatedDepth, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH]);
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH];
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->reconstructedPrevNearestDepth, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH]);
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH];
+
+ // if auto exposure is enabled use the auto exposure SRV, otherwise what the app sends.
+ if (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_AUTO_EXPOSURE) {
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FRAME_INFO];
+ } else {
+ if (ffxFsr3UpscalerResourceIsNull(params->exposure)) {
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE];
+ } else {
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->exposure, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE]);
+ }
+ }
+
+ if (ffxFsr3UpscalerResourceIsNull(params->reactive)) {
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY];
+ }
+ else {
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->reactive, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]);
+ }
+
+ if (ffxFsr3UpscalerResourceIsNull(params->transparencyAndComposition)) {
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY];
+ } else {
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->transparencyAndComposition, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK]);
+ }
+
+ context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->output, context->effectContextId, &context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT]);
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION] = context->srvResources[accumulationSrvResourceIndex];
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = context->srvResources[upscaledColorSrvResourceIndex];
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION] = context->uavResources[accumulationUavResourceIndex];
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = context->uavResources[upscaledColorUavResourceIndex];
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RCAS_INPUT] = context->uavResources[upscaledColorUavResourceIndex];
+
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_CURRENT_LUMA] = context->srvResources[currentLumaSrvResourceIndex];
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_CURRENT_LUMA] = context->uavResources[currentLumaUavResourceIndex];
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREVIOUS_LUMA] = context->srvResources[previousLumaSrvResourceIndex];
+
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY] = context->uavResources[lumaHistoryUavResourceIndex];
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY] = context->srvResources[lumaHistorySrvResourceIndex];
+
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1];
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH] = context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1];
+
+ context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_INSTABILITY] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1];
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_INSTABILITY] = context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1];
+
+ // actual resource size may differ from render/display resolution (e.g. due to Hw/API restrictions), so query the descriptor for UVs adjustment
+ const FfxResourceDescription resourceDescInputColor = context->contextDescription.backendInterface.fpGetResourceDescription(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR]);
+ const FfxResourceDescription resourceDescReactiveMask = context->contextDescription.backendInterface.fpGetResourceDescription(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]);
+ FFX_ASSERT(resourceDescInputColor.type == FFX_RESOURCE_TYPE_TEXTURE2D);
+
+ context->constants.previousFrameJitterOffset[0] = context->constants.jitterOffset[0];
+ context->constants.previousFrameJitterOffset[1] = context->constants.jitterOffset[1];
+ context->constants.jitterOffset[0] = params->jitterOffset.x;
+ context->constants.jitterOffset[1] = params->jitterOffset.y;
+
+ context->constants.previousFrameRenderSize[0] = context->constants.renderSize[0];
+ context->constants.previousFrameRenderSize[1] = context->constants.renderSize[1];
+ context->constants.renderSize[0] = int32_t(params->renderSize.width ? params->renderSize.width : resourceDescInputColor.width);
+ context->constants.renderSize[1] = int32_t(params->renderSize.height ? params->renderSize.height : resourceDescInputColor.height);
+ context->constants.maxRenderSize[0] = int32_t(context->contextDescription.maxRenderSize.width);
+ context->constants.maxRenderSize[1] = int32_t(context->contextDescription.maxRenderSize.height);
+
+ // compute the horizontal FOV for the shader from the vertical one.
+ const float aspectRatio = (float)params->renderSize.width / (float)params->renderSize.height;
+ const float cameraAngleHorizontal = atan(tan(params->cameraFovAngleVertical / 2) * aspectRatio) * 2;
+ context->constants.tanHalfFOV = tanf(cameraAngleHorizontal * 0.5f);
+ context->constants.viewSpaceToMetersFactor = (params->viewSpaceToMetersFactor > 0.0f) ? params->viewSpaceToMetersFactor : 1.0f;
+
+ // compute params to enable device depth to view space depth computation in shader
+ setupDeviceDepthToViewSpaceDepthParams(context, params);
+
+ context->constants.previousFrameUpscaleSize[0] = context->constants.upscaleSize[0];
+ context->constants.previousFrameUpscaleSize[1] = context->constants.upscaleSize[1];
+
+ if (params->upscaleSize.height == 0 && params->upscaleSize.width == 0)
+ {
+ context->constants.upscaleSize[0] = context->contextDescription.maxUpscaleSize.width;
+ context->constants.upscaleSize[1] = context->contextDescription.maxUpscaleSize.height;
+ }
+ else
+ {
+ context->constants.upscaleSize[0] = params->upscaleSize.width;
+ context->constants.upscaleSize[1] = params->upscaleSize.height;
+ }
+
+ // To be updated if resource is larger than the actual image size
+ context->constants.downscaleFactor[0] = float(context->constants.renderSize[0]) / context->constants.upscaleSize[0];
+ context->constants.downscaleFactor[1] = float(context->constants.renderSize[1]) / context->constants.upscaleSize[1];
+
+ // calculate pre-exposure relevant factors
+ context->constants.deltaPreExposure = 1.0f;
+ context->previousFramePreExposure = context->preExposure;
+ context->preExposure = (params->preExposure != 0.0f) ? params->preExposure : 1.0f;
+
+ if (context->previousFramePreExposure > 0.0f) {
+ context->constants.deltaPreExposure = context->preExposure / context->previousFramePreExposure;
+ }
+
+ // motion vector data
+ const int32_t* motionVectorsTargetSize = (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS)
+ ? context->constants.upscaleSize
+ : context->constants.renderSize;
+
+ context->constants.motionVectorScale[0] = (params->motionVectorScale.x / motionVectorsTargetSize[0]);
+ context->constants.motionVectorScale[1] = (params->motionVectorScale.y / motionVectorsTargetSize[1]);
+
+ // compute jitter cancellation
+ if (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) {
+
+ context->constants.motionVectorJitterCancellation[0] = (context->previousJitterOffset[0] - context->constants.jitterOffset[0]) / motionVectorsTargetSize[0];
+ context->constants.motionVectorJitterCancellation[1] = (context->previousJitterOffset[1] - context->constants.jitterOffset[1]) / motionVectorsTargetSize[1];
+
+ context->previousJitterOffset[0] = context->constants.jitterOffset[0];
+ context->previousJitterOffset[1] = context->constants.jitterOffset[1];
+ }
+
+ // lock data, assuming jitter sequence length computation for now
+ const int32_t jitterPhaseCount = ffxFsr3UpscalerGetJitterPhaseCount(params->renderSize.width, context->constants.upscaleSize[0]);
+
+ // init on first frame
+ if (resetAccumulation || context->constants.jitterPhaseCount == 0) {
+ context->constants.jitterPhaseCount = (float)jitterPhaseCount;
+ } else {
+ const int32_t jitterPhaseCountDelta = (int32_t)(jitterPhaseCount - context->constants.jitterPhaseCount);
+ if (jitterPhaseCountDelta > 0) {
+ context->constants.jitterPhaseCount++;
+ } else if (jitterPhaseCountDelta < 0) {
+ context->constants.jitterPhaseCount--;
+ }
+ }
+
+ // convert delta time to seconds and clamp to [0, 1].
+ context->constants.deltaTime = FFX_MAXIMUM(0.0f, FFX_MINIMUM(1.0f, params->frameTimeDelta / 1000.0f));
+
+ if (resetAccumulation) {
+ context->constants.frameIndex = 0.0f;
+ } else {
+ context->constants.frameIndex += 1.0f;
+ }
+
+ // GODOT BEGINS
+ memcpy(context->constants.reprojectionMatrix, params->reprojectionMatrix, sizeof(context->constants.reprojectionMatrix));
+ // GODOT ENDS
+
+ // reactive mask bias
+ const int32_t threadGroupWorkRegionDim = 8;
+ const int32_t dispatchSrcX = (context->constants.renderSize[0] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ const int32_t dispatchSrcY = (context->constants.renderSize[1] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ const int32_t dispatchDstX = (context->constants.upscaleSize[0] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ const int32_t dispatchDstY = (context->constants.upscaleSize[1] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ const int32_t dispatchShadingChangePassX = (int32_t(context->constants.renderSize[0] * 0.5f) + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ const int32_t dispatchShadingChangePassY = (int32_t(context->constants.renderSize[1] * 0.5f) + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+
+ // Clear reconstructed depth for max depth store.
+ if (resetAccumulation) {
+
+ FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT };
+ wcscpy_s(clearJob.jobLabel, L"Clear Resource");
+
+ const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f };
+ memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float));
+ clearJob.clearJobDescriptor.target = context->srvResources[accumulationSrvResourceIndex];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+
+ wcscpy_s(clearJob.jobLabel, L"Clear Scene Luminance");
+ clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+
+ // Auto exposure always used to track luma changes in locking logic
+ {
+ const float clearValuesExposure[]{ -1.f, 1.f, 0.f, 0.f };
+ memcpy(clearJob.clearJobDescriptor.color, clearValuesExposure, 4 * sizeof(float));
+ wcscpy_s(clearJob.jobLabel, L"Clear Frame Info");
+ clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FRAME_INFO];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ }
+ }
+
+ {
+ FfxGpuJobDescription clearJob = {FFX_GPU_JOB_CLEAR_FLOAT};
+ // FSR3: need to clear here since we need the content of this surface for frameinterpolation
+ // so clearing in the lock pass is not an option
+ const bool bInverted = (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED) == FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED;
+ const float clearDepthValue[]{bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f};
+ memcpy(clearJob.clearJobDescriptor.color, clearDepthValue, 4 * sizeof(float));
+ wcscpy_s(clearJob.jobLabel, L"Clear Reconstructed Previous Nearest Depth");
+ clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ }
+
+ // Suggested by Enduring to resolve issues with running FSR3 on console via the RHI backend in the plugin as this resource won't be cleared to 0 by default.
+ {
+ FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT };
+ wcscpy_s(clearJob.jobLabel, L"Clear Spd Atomic Count");
+ const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f };
+ memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float));
+ clearJob.clearJobDescriptor.target = context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ }
+
+ // Auto exposure
+ uint32_t dispatchThreadGroupCountXY[2];
+ uint32_t workGroupOffset[2];
+ uint32_t numWorkGroupsAndMips[2];
+ uint32_t rectInfo[4] = { 0, 0, params->renderSize.width, params->renderSize.height };
+ ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo);
+
+ // downsample
+ Fsr3UpscalerSpdConstants luminancePyramidConstants;
+ luminancePyramidConstants.numworkGroups = numWorkGroupsAndMips[0];
+ luminancePyramidConstants.mips = numWorkGroupsAndMips[1];
+ luminancePyramidConstants.workGroupOffset[0] = workGroupOffset[0];
+ luminancePyramidConstants.workGroupOffset[1] = workGroupOffset[1];
+ luminancePyramidConstants.renderSize[0] = params->renderSize.width;
+ luminancePyramidConstants.renderSize[1] = params->renderSize.height;
+
+ // compute the constants.
+ Fsr3UpscalerRcasConstants rcasConsts = {};
+ const float sharpenessRemapped = (-2.0f * params->sharpness) + 2.0f;
+ FsrRcasCon(rcasConsts.rcasConfig, sharpenessRemapped);
+
+ // initialize constantBuffers data
+ context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, &context->constants, sizeof(context->constants), &context->constantBuffers[FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_FSR3UPSCALER]);
+ context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, &luminancePyramidConstants, sizeof(luminancePyramidConstants), &context->constantBuffers[FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_SPD]);
+ context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, &rcasConsts, sizeof(rcasConsts), &context->constantBuffers[FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_RCAS]);
+
+ {
+ FfxResourceInternal aliasableResources[] = {
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1],
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SHADING_CHANGE],
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NEW_LOCKS],
+ // SPD_MIPS are an aliasable resource, but need to be cleared to prevent reading pixels that have never been written
+ //context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS],
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH_MIP1],
+ context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS],
+ };
+ for(int i = 0; i<_countof(aliasableResources); ++i)
+ {
+ FfxGpuJobDescription discardJob = { FFX_GPU_JOB_DISCARD };
+ discardJob.discardJobDescriptor.target = aliasableResources[i];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &discardJob);
+ }
+ // SPD counter needs to be cleared
+ {
+ FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT };
+ wcscpy_s(clearJob.jobLabel, L"Clear Spd Atomic Count");
+ const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f };
+ memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float));
+ clearJob.clearJobDescriptor.target = context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ }
+ }
+
+ scheduleDispatch(context, params, &context->pipelinePrepareInputs, dispatchSrcX, dispatchSrcY);
+ scheduleDispatch(context, params, &context->pipelineLumaPyramid, dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1]);
+ scheduleDispatch(context, params, &context->pipelineShadingChangePyramid, dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1]);
+ scheduleDispatch(context, params, &context->pipelineShadingChange, dispatchShadingChangePassX, dispatchShadingChangePassY);
+ scheduleDispatch(context, params, &context->pipelinePrepareReactivity, dispatchSrcX, dispatchSrcY);
+ scheduleDispatch(context, params, &context->pipelineLumaInstability, dispatchSrcX, dispatchSrcY);
+
+ scheduleDispatch(context, params, params->enableSharpening ? &context->pipelineAccumulateSharpen : &context->pipelineAccumulate, dispatchDstX, dispatchDstY);
+
+ // RCAS
+ if (params->enableSharpening)
+ {
+
+ // dispatch RCAS
+ const int32_t threadGroupWorkRegionDimRCAS = 16;
+ const int32_t dispatchX = (context->constants.upscaleSize[0] + (threadGroupWorkRegionDimRCAS - 1)) / threadGroupWorkRegionDimRCAS;
+ const int32_t dispatchY = (context->constants.upscaleSize[1] + (threadGroupWorkRegionDimRCAS - 1)) / threadGroupWorkRegionDimRCAS;
+ scheduleDispatch(context, params, &context->pipelineRCAS, dispatchX, dispatchY);
+ }
+
+ if (params->flags & FFX_FSR3UPSCALER_DISPATCH_DRAW_DEBUG_VIEW) {
+ scheduleDispatch(context, params, &context->pipelineDebugView, dispatchDstX, dispatchDstY);
+ }
+
+ context->resourceFrameIndex = (context->resourceFrameIndex + 1) % FSR3UPSCALER_MAX_QUEUED_FRAMES;
+
+ // Fsr3UpscalerMaxQueuedFrames must be an even number.
+ FFX_STATIC_ASSERT((FSR3UPSCALER_MAX_QUEUED_FRAMES & 1) == 0);
+
+ context->contextDescription.backendInterface.fpExecuteGpuJobs(&context->contextDescription.backendInterface, commandList, context->effectContextId);
+
+ // release dynamic resources
+ context->contextDescription.backendInterface.fpUnregisterResources(&context->contextDescription.backendInterface, commandList, context->effectContextId);
+
+ return FFX_OK;
+}
+
+FFX_API FfxErrorCode ffxFsr3UpscalerContextCreate(FfxFsr3UpscalerContext* context, const FfxFsr3UpscalerContextDescription* contextDescription)
+{
+ // zero context memory
+ memset(context, 0, sizeof(FfxFsr3UpscalerContext));
+
+ // check pointers are valid.
+ FFX_RETURN_ON_ERROR(
+ context,
+ FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(
+ contextDescription,
+ FFX_ERROR_INVALID_POINTER);
+
+ // validate that all callbacks are set for the interface
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetSDKVersion, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+
+ // if a scratch buffer is declared, then we must have a size
+ if (contextDescription->backendInterface.scratchBuffer) {
+
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE);
+ }
+
+ // ensure the context is large enough for the internal context.
+ FFX_STATIC_ASSERT(sizeof(FfxFsr3UpscalerContext) >= sizeof(FfxFsr3UpscalerContext_Private));
+
+ // create the context.
+ FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context);
+ const FfxErrorCode errorCode = fsr3upscalerCreate(contextPrivate, contextDescription);
+
+ return errorCode;
+}
+
+FFX_API FfxErrorCode ffxFsr3UpscalerContextGetGpuMemoryUsage(FfxFsr3UpscalerContext* context, FfxEffectMemoryUsage* vramUsage)
+{
+ FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(vramUsage, FFX_ERROR_INVALID_POINTER);
+ FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context);
+
+ FFX_RETURN_ON_ERROR(contextPrivate->device, FFX_ERROR_NULL_DEVICE);
+
+ FfxErrorCode errorCode = contextPrivate->contextDescription.backendInterface.fpGetEffectGpuMemoryUsage(
+ &contextPrivate->contextDescription.backendInterface, contextPrivate->effectContextId, vramUsage);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ return FFX_OK;
+}
+
+FfxErrorCode ffxFsr3UpscalerContextDestroy(FfxFsr3UpscalerContext* context)
+{
+ FFX_RETURN_ON_ERROR(
+ context,
+ FFX_ERROR_INVALID_POINTER);
+
+ // destroy the context.
+ FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context);
+ const FfxErrorCode errorCode = fsr3upscalerRelease(contextPrivate);
+ return errorCode;
+}
+
+FfxErrorCode ffxFsr3UpscalerContextDispatch(FfxFsr3UpscalerContext* context, const FfxFsr3UpscalerDispatchDescription* dispatchParams)
+{
+ FFX_RETURN_ON_ERROR(
+ context,
+ FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(
+ dispatchParams,
+ FFX_ERROR_INVALID_POINTER);
+
+ FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context);
+
+ // validate that renderSize is within the maximum.
+ FFX_RETURN_ON_ERROR(
+ dispatchParams->renderSize.width <= contextPrivate->contextDescription.maxRenderSize.width,
+ FFX_ERROR_OUT_OF_RANGE);
+ FFX_RETURN_ON_ERROR(
+ dispatchParams->renderSize.height <= contextPrivate->contextDescription.maxRenderSize.height,
+ FFX_ERROR_OUT_OF_RANGE);
+ FFX_RETURN_ON_ERROR(
+ dispatchParams->upscaleSize.width <= contextPrivate->contextDescription.maxUpscaleSize.width,
+ FFX_ERROR_OUT_OF_RANGE);
+ FFX_RETURN_ON_ERROR(
+ dispatchParams->upscaleSize.height <= contextPrivate->contextDescription.maxUpscaleSize.height,
+ FFX_ERROR_OUT_OF_RANGE);
+ FFX_RETURN_ON_ERROR(
+ contextPrivate->device,
+ FFX_ERROR_NULL_DEVICE);
+
+ // dispatch the FSR3 passes.
+ const FfxErrorCode errorCode = fsr3upscalerDispatch(contextPrivate, dispatchParams);
+ return errorCode;
+}
+
+FFX_API float ffxFsr3UpscalerGetUpscaleRatioFromQualityMode(FfxFsr3UpscalerQualityMode qualityMode)
+{
+ switch (qualityMode) {
+ case FFX_FSR3UPSCALER_QUALITY_MODE_NATIVEAA:
+ return 1.0f;
+ case FFX_FSR3UPSCALER_QUALITY_MODE_QUALITY:
+ return 1.5f;
+ case FFX_FSR3UPSCALER_QUALITY_MODE_BALANCED:
+ return 1.7f;
+ case FFX_FSR3UPSCALER_QUALITY_MODE_PERFORMANCE:
+ return 2.0f;
+ case FFX_FSR3UPSCALER_QUALITY_MODE_ULTRA_PERFORMANCE:
+ return 3.0f;
+ default:
+ return 0.0f;
+ }
+}
+
+FFX_API FfxErrorCode ffxFsr3UpscalerGetRenderResolutionFromQualityMode(
+ uint32_t* renderWidth,
+ uint32_t* renderHeight,
+ uint32_t displayWidth,
+ uint32_t displayHeight,
+ FfxFsr3UpscalerQualityMode qualityMode)
+{
+ FFX_RETURN_ON_ERROR(
+ renderWidth,
+ FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(
+ renderHeight,
+ FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(
+ FFX_FSR3UPSCALER_QUALITY_MODE_NATIVEAA <= qualityMode && qualityMode <= FFX_FSR3UPSCALER_QUALITY_MODE_ULTRA_PERFORMANCE,
+ FFX_ERROR_INVALID_ENUM);
+
+ // scale by the predefined ratios in each dimension.
+ const float ratio = ffxFsr3UpscalerGetUpscaleRatioFromQualityMode(qualityMode);
+ const uint32_t scaledDisplayWidth = (uint32_t)((float)displayWidth / ratio);
+ const uint32_t scaledDisplayHeight = (uint32_t)((float)displayHeight / ratio);
+ *renderWidth = scaledDisplayWidth;
+ *renderHeight = scaledDisplayHeight;
+
+ return FFX_OK;
+}
+
+int32_t ffxFsr3UpscalerGetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth)
+{
+ const float basePhaseCount = 8.0f;
+ const int32_t jitterPhaseCount = int32_t(basePhaseCount * pow((float(displayWidth) / renderWidth), 2.0f));
+ return jitterPhaseCount;
+}
+
+FfxErrorCode ffxFsr3UpscalerGetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount)
+{
+ FFX_RETURN_ON_ERROR(
+ outX,
+ FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(
+ outY,
+ FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(
+ phaseCount > 0,
+ FFX_ERROR_INVALID_ARGUMENT);
+
+ const float x = halton((index % phaseCount) + 1, 2) - 0.5f;
+ const float y = halton((index % phaseCount) + 1, 3) - 0.5f;
+
+ *outX = x;
+ *outY = y;
+ return FFX_OK;
+}
+
+FFX_API bool ffxFsr3UpscalerResourceIsNull(FfxResource resource)
+{
+ return resource.resource == NULL;
+}
+
+FfxErrorCode ffxFsr3UpscalerContextGenerateReactiveMask(FfxFsr3UpscalerContext* context, const FfxFsr3UpscalerGenerateReactiveDescription* params)
+{
+ FFX_RETURN_ON_ERROR(
+ context,
+ FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(
+ params,
+ FFX_ERROR_INVALID_POINTER);
+ // GODOT BEGINS
+ // Godot doesn't use FFX context to pass command list.
+ // So we don't need to ensure that the command list is not null.
+ // FFX_RETURN_ON_ERROR(
+ // params->commandList,
+ // FFX_ERROR_INVALID_POINTER);
+ // GODOT ENDS
+
+ FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context);
+
+ FFX_RETURN_ON_ERROR(
+ contextPrivate->device,
+ FFX_ERROR_NULL_DEVICE);
+
+ // take a short cut to the command list
+ FfxCommandList commandList = params->commandList;
+
+ FfxPipelineState* pipeline = &contextPrivate->pipelineGenerateReactive;
+
+ const int32_t threadGroupWorkRegionDim = 8;
+ const int32_t dispatchSrcX = (params->renderSize.width + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ const int32_t dispatchSrcY = (params->renderSize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+
+ FfxComputeJobDescription jobDescriptor = {};
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->colorOpaqueOnly, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]);
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->colorPreUpscale, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR]);
+ contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->outReactive, contextPrivate->effectContextId, &contextPrivate->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOREACTIVE]);
+
+ jobDescriptor.uavTextures[0].resource = contextPrivate->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOREACTIVE];
+
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.srvTextures[0].name, pipeline->srvTextureBindings[0].name);
+ wcscpy_s(jobDescriptor.srvTextures[1].name, pipeline->srvTextureBindings[1].name);
+ wcscpy_s(jobDescriptor.uavTextures[0].name, pipeline->uavTextureBindings[0].name);
+#endif
+
+ jobDescriptor.dimensions[0] = dispatchSrcX;
+ jobDescriptor.dimensions[1] = dispatchSrcY;
+ jobDescriptor.dimensions[2] = 1;
+ jobDescriptor.pipeline = *pipeline;
+
+ for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) {
+
+ const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier;
+ const FfxResourceInternal currentResource = contextPrivate->srvResources[currentResourceId];
+ jobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource;
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.srvTextures[currentShaderResourceViewIndex].name, pipeline->srvTextureBindings[currentShaderResourceViewIndex].name);
+#endif
+ }
+
+ Fsr3UpscalerGenerateReactiveConstants genReactiveConsts = {};
+ genReactiveConsts.scale = params->scale;
+ genReactiveConsts.threshold = params->cutoffThreshold;
+ genReactiveConsts.binaryValue = params->binaryValue;
+ genReactiveConsts.flags = params->flags;
+
+ contextPrivate->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&contextPrivate->contextDescription.backendInterface, &genReactiveConsts, sizeof(genReactiveConsts), &contextPrivate->constantBuffers[FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE]);
+
+ for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex)
+ {
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.cbNames[currentRootConstantIndex], pipeline->constantBufferBindings[currentRootConstantIndex].name);
+#endif
+ jobDescriptor.cbs[currentRootConstantIndex] = contextPrivate->constantBuffers[pipeline->constantBufferBindings[currentRootConstantIndex].resourceIdentifier];
+ }
+
+ FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE };
+ dispatchJob.computeJobDescriptor = jobDescriptor;
+
+ contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &dispatchJob);
+
+ contextPrivate->contextDescription.backendInterface.fpExecuteGpuJobs(&contextPrivate->contextDescription.backendInterface, commandList, contextPrivate->effectContextId);
+
+ // release dynamic resources
+ contextPrivate->contextDescription.backendInterface.fpUnregisterResources(&contextPrivate->contextDescription.backendInterface, commandList, contextPrivate->effectContextId);
+
+ return FFX_OK;
+}
+
+FFX_API FfxVersionNumber ffxFsr3UpscalerGetEffectVersion()
+{
+ return FFX_SDK_MAKE_VERSION(FFX_FSR3UPSCALER_VERSION_MAJOR, FFX_FSR3UPSCALER_VERSION_MINOR, FFX_FSR3UPSCALER_VERSION_PATCH);
+}
+
+FFX_API FfxErrorCode ffxFsr3UpscalerSetConstant(FfxFsr3UpscalerContext* context, FfxFsr3UpscalerConfigureKey key, void* valuePtr)
+{
+ FFX_RETURN_ON_ERROR(
+ context,
+ FFX_ERROR_INVALID_POINTER);
+
+ FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context);
+ switch (key)
+ {
+ case FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FVELOCITYFACTOR:
+ {
+ float fValue = 1.0f;
+ if (valuePtr != nullptr)
+ {
+ fValue = *(static_cast(valuePtr));
+ }
+ contextPrivate->constants.velocityFactor = ffxSaturate(fValue);
+ break;
+ }
+ case FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FREACTIVENESSSCALE:
+ {
+ float fValue = 1.0f;
+ if (valuePtr != nullptr)
+ {
+ fValue = *(static_cast(valuePtr));
+ }
+ contextPrivate->constants.reactivenessScale = ffxMax(0.f,fValue);
+ break;
+ }
+ case FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FSHADINGCHANGESCALE:
+ {
+ float fValue = 1.0f;
+ if (valuePtr != nullptr)
+ {
+ fValue = *(static_cast(valuePtr));
+ }
+ contextPrivate->constants.shadingChangeScale = ffxMax(0.f,fValue);
+ break;
+ }
+ case FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FACCUMULATIONADDEDPERFRAME:
+ {
+ float fValue = 1.0f/3.0f;
+ if (valuePtr != nullptr)
+ {
+ fValue = *(static_cast(valuePtr));
+ }
+ contextPrivate->constants.accumulationAddedPerFrame = ffxSaturate(fValue);
+ break;
+ }
+ case FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FMINDISOCCLUSIONACCUMULATION:
+ {
+ float fValue = -1.0f/3.0f;
+ if (valuePtr != nullptr)
+ {
+ fValue = *(static_cast(valuePtr));
+ }
+ contextPrivate->constants.minDisocclusionAccumulation = ffxMin(1.0f, ffxMax(-1.0f, fValue));
+ break;
+ }
+ default:
+ return FFX_ERROR_INVALID_ENUM;
+ }
+ return FFX_OK;
+}
+
+FFX_API FfxErrorCode ffxFsr3UpscalerSetGlobalDebugMessage(ffxMessageCallback fpMessage, uint32_t debugLevel)
+{
+ ffxSetPrintMessageCallback(fpMessage, debugLevel);
+ return FFX_OK;
+}
diff --git a/thirdparty/amd-ffx/ffx_fsr3upscaler.h b/thirdparty/amd-ffx/ffx_fsr3upscaler.h
new file mode 100644
index 000000000000..e4646895961c
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_fsr3upscaler.h
@@ -0,0 +1,597 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+// Include the interface for the backend of the FSR3 API.
+#include "ffx_interface.h"
+
+/// @defgroup ffxFsr3Upscaler FidelityFX FSR3
+/// FidelityFX Super Resolution 3 runtime library
+///
+/// @ingroup SDKComponents
+
+/// FidelityFX Super Resolution 3 major version.
+///
+/// @ingroup ffxFsr3Upscaler
+#define FFX_FSR3UPSCALER_VERSION_MAJOR (3)
+
+/// FidelityFX Super Resolution 3 minor version.
+///
+/// @ingroup ffxFsr3Upscaler
+#define FFX_FSR3UPSCALER_VERSION_MINOR (1)
+
+/// FidelityFX Super Resolution 3 patch version.
+///
+/// @ingroup ffxFsr3Upscaler
+#define FFX_FSR3UPSCALER_VERSION_PATCH (4)
+
+/// FidelityFX Super Resolution 3 context count
+///
+/// Defines the number of internal effect contexts required by FSR3
+///
+/// @ingroup ffxFsr3Upscaler
+#define FFX_FSR3UPSCALER_CONTEXT_COUNT 1
+
+/// The size of the context specified in 32bit values.
+///
+/// @ingroup ffxFsr3Upscaler
+#define FFX_FSR3UPSCALER_CONTEXT_SIZE (FFX_SDK_DEFAULT_CONTEXT_SIZE)
+
+#if defined(__cplusplus)
+extern "C" {
+#endif // #if defined(__cplusplus)
+
+/// An enumeration of all the passes which constitute the FSR3 algorithm.
+///
+/// FSR3 is implemented as a composite of several compute passes each
+/// computing a key part of the final result. Each call to the
+/// FfxFsr3UpscalerScheduleGpuJobFunc callback function will
+/// correspond to a single pass included in FfxFsr3UpscalerPass. For a
+/// more comprehensive description of each pass, please refer to the FSR3
+/// reference documentation.
+///
+/// Please note in some cases e.g.: FFX_FSR3UPSCALER_PASS_ACCUMULATE
+/// and FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN either one pass or the
+/// other will be used (they are mutually exclusive). The choice of which will
+/// depend on the way the FfxFsr3UpscalerContext is created and the
+/// precise contents of FfxFsr3UpscalerDispatchParamters each time a call
+/// is made to ffxFsr3UpscalerContextDispatch.
+///
+/// @ingroup ffxFsr3Upscaler
+typedef enum FfxFsr3UpscalerPass
+{
+ FFX_FSR3UPSCALER_PASS_PREPARE_INPUTS, ///< A pass which prepares game inputs for later passes
+ FFX_FSR3UPSCALER_PASS_LUMA_PYRAMID, ///< A pass which generates the luminance mipmap chain for the current frame.
+ FFX_FSR3UPSCALER_PASS_SHADING_CHANGE_PYRAMID, ///< A pass which generates the shading change detection mipmap chain for the current frame.
+ FFX_FSR3UPSCALER_PASS_SHADING_CHANGE, ///< A pass which estimates shading changes for the current frame
+ FFX_FSR3UPSCALER_PASS_PREPARE_REACTIVITY, ///< A pass which prepares accumulation relevant information
+ FFX_FSR3UPSCALER_PASS_LUMA_INSTABILITY, ///< A pass which estimates temporal instability of the luminance changes.
+ FFX_FSR3UPSCALER_PASS_ACCUMULATE, ///< A pass which performs upscaling.
+ FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN, ///< A pass which performs upscaling when sharpening is used.
+ FFX_FSR3UPSCALER_PASS_RCAS, ///< A pass which performs sharpening.
+ FFX_FSR3UPSCALER_PASS_DEBUG_VIEW, ///< A pass which draws some internal resources, for debugging purposes
+
+ FFX_FSR3UPSCALER_PASS_GENERATE_REACTIVE, ///< An optional pass to generate a reactive mask.
+ FFX_FSR3UPSCALER_PASS_TCR_AUTOGENERATE, ///< DEPRECATED - NO LONGER SUPPORTED
+ FFX_FSR3UPSCALER_PASS_COUNT ///< The number of passes performed by FSR3.
+} FfxFsr3UpscalerPass;
+
+/// An enumeration of all the quality modes supported by FidelityFX Super
+/// Resolution 3 upscaling.
+///
+/// In order to provide a consistent user experience across multiple
+/// applications which implement FSR3. It is strongly recommended that the
+/// following preset scaling factors are made available through your
+/// application's user interface.
+///
+/// If your application does not expose the notion of preset scaling factors
+/// for upscaling algorithms (perhaps instead implementing a fixed ratio which
+/// is immutable) or implementing a more dynamic scaling scheme (such as
+/// dynamic resolution scaling), then there is no need to use these presets.
+///
+/// Please note that FFX_FSR3UPSCALER_QUALITY_MODE_ULTRA_PERFORMANCE is
+/// an optional mode which may introduce significant quality degradation in the
+/// final image. As such it is recommended that you evaluate the final results
+/// of using this scaling mode before deciding if you should include it in your
+/// application.
+///
+/// @ingroup ffxFsr3Upscaler
+typedef enum FfxFsr3UpscalerQualityMode {
+ FFX_FSR3UPSCALER_QUALITY_MODE_NATIVEAA = 0, ///< Perform upscaling with a per-dimension upscaling ratio of 1.0x.
+ FFX_FSR3UPSCALER_QUALITY_MODE_QUALITY = 1, ///< Perform upscaling with a per-dimension upscaling ratio of 1.5x.
+ FFX_FSR3UPSCALER_QUALITY_MODE_BALANCED = 2, ///< Perform upscaling with a per-dimension upscaling ratio of 1.7x.
+ FFX_FSR3UPSCALER_QUALITY_MODE_PERFORMANCE = 3, ///< Perform upscaling with a per-dimension upscaling ratio of 2.0x.
+ FFX_FSR3UPSCALER_QUALITY_MODE_ULTRA_PERFORMANCE = 4 ///< Perform upscaling with a per-dimension upscaling ratio of 3.0x.
+} FfxFsr3UpscalerQualityMode;
+
+/// An enumeration of bit flags used when creating a
+/// FfxFsr3UpscalerContext. See FfxFsr3UpscalerContextDescription.
+///
+/// @ingroup ffxFsr3Upscaler
+typedef enum FfxFsr3UpscalerInitializationFlagBits {
+
+ FFX_FSR3UPSCALER_ENABLE_HIGH_DYNAMIC_RANGE = (1<<0), ///< A bit indicating if the input color data provided is using a high-dynamic range.
+ FFX_FSR3UPSCALER_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS = (1<<1), ///< A bit indicating if the motion vectors are rendered at display resolution.
+ FFX_FSR3UPSCALER_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION = (1<<2), ///< A bit indicating that the motion vectors have the jittering pattern applied to them.
+ FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED = (1<<3), ///< A bit indicating that the input depth buffer data provided is inverted [1..0].
+ FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE = (1<<4), ///< A bit indicating that the input depth buffer data provided is using an infinite far plane.
+ FFX_FSR3UPSCALER_ENABLE_AUTO_EXPOSURE = (1<<5), ///< A bit indicating if automatic exposure should be applied to input color data.
+ FFX_FSR3UPSCALER_ENABLE_DYNAMIC_RESOLUTION = (1<<6), ///< A bit indicating that the application uses dynamic resolution scaling.
+ FFX_FSR3UPSCALER_ENABLE_TEXTURE1D_USAGE = (1<<7), ///< This value is deprecated, but remains in order to aid upgrading from older versions of FSR3.
+ FFX_FSR3UPSCALER_ENABLE_DEBUG_CHECKING = (1<<8), ///< A bit indicating that the runtime should check some API values and report issues.
+} FfxFsr3UpscalerInitializationFlagBits;
+
+/// Pass a string message
+///
+/// Used for debug messages.
+///
+/// @param [in] type The type of message.
+/// @param [in] message A string message to pass.
+///
+///
+/// @ingroup ffxFsr3Upscaler
+typedef void(*FfxFsr3UpscalerMessage)(
+ FfxMsgType type,
+ const wchar_t* message);
+
+/// A structure encapsulating the parameters required to initialize FidelityFX
+/// Super Resolution 3 upscaling.
+///
+/// @ingroup ffxFsr3Upscaler
+typedef struct FfxFsr3UpscalerContextDescription {
+
+ uint32_t flags; ///< A collection of FfxFsr3UpscalerInitializationFlagBits.
+ FfxDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at.
+ FfxDimensions2D maxUpscaleSize; ///< The size of the output resolution targeted by the upscaling process.
+ FfxFsr3UpscalerMessage fpMessage; ///< A pointer to a function that can receive messages from the runtime.
+ FfxInterface backendInterface; ///< A set of pointers to the backend implementation for FidelityFX SDK
+
+} FfxFsr3UpscalerContextDescription;
+
+typedef enum FfxFsr3UpscalerDispatchFlags
+{
+ FFX_FSR3UPSCALER_DISPATCH_DRAW_DEBUG_VIEW = (1 << 0), ///< A bit indicating that the interpolated output resource will contain debug views with relevant information.
+} FfxFsr3UpscalerDispatchFlags;
+
+typedef enum FfxFsr3UpscalerConfigureKey
+{
+ FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FVELOCITYFACTOR = 0, //Override constant buffer fVelocityFactor. The float value is casted from void * ptr. Value of 0.0f can improve temporal stability of bright pixels. Default value is 1.0f. Value is clamped to [0.0f, 1.0f].
+ FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FREACTIVENESSSCALE = 1, //Override constant buffer fReactivenessScale. The float value is casted from void * ptr. Meant for development purpose to test if writing a larger value to reactive mask, reduces ghosting. Default value is 1.0f. Value is clamped to [0.0f, +infinity].
+ FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FSHADINGCHANGESCALE =2, //Override fShadingChangeScale. Increasing this scales fsr3.1 computed shading change value at read to have higher reactiveness. Default value is 1.0f. Value is clamped to [0.0f, +infinity].
+ FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FACCUMULATIONADDEDPERFRAME = 3, // Override constant buffer fAccumulationAddedPerFrame. Corresponds to amount of accumulation added per frame at pixel coordinate where disocclusion occured or when reactive mask value is > 0.0f. Decreasing this and drawing the ghosting object (IE no mv) to reactive mask with value close to 1.0f can decrease temporal ghosting. Decreasing this value could result in more thin feature pixels flickering. Default value is 0.333. Value is clamped to [0.0f, 1.0f].
+ FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FMINDISOCCLUSIONACCUMULATION = 4, //Override constant buffer fMinDisocclusionAccumulation. Increasing this value may reduce white pixel temporal flickering around swaying thin objects that are disoccluding one another often. Too high value may increase ghosting. Default value is -0.333. A sufficiently negative value means for pixel coordinate at frame N that is disoccluded, add fAccumulationAddedPerFrame starting at frame N+2. Default value is -0.333. Value is clamped to [-1.0f, 1.0f].
+} FfxFsr3UpscalerConfigureKey;
+
+/// A structure encapsulating the parameters for dispatching the various passes
+/// of FidelityFX Super Resolution 3.
+///
+/// @ingroup ffxFsr3Upscaler
+typedef struct FfxFsr3UpscalerDispatchDescription {
+
+ FfxCommandList commandList; ///< The FfxCommandList to record FSR3 rendering commands into.
+ FfxResource color; ///< A FfxResource containing the color buffer for the current frame (at render resolution).
+ FfxResource depth; ///< A FfxResource containing 32bit depth values for the current frame (at render resolution).
+ FfxResource motionVectors; ///< A FfxResource containing 2-dimensional motion vectors (at render resolution if FFX_FSR3UPSCALER_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS is not set).
+ FfxResource exposure; ///< A optional FfxResource containing a 1x1 exposure value.
+ FfxResource reactive; ///< A optional FfxResource containing alpha value of reactive objects in the scene.
+ FfxResource transparencyAndComposition; ///< A optional FfxResource containing alpha value of special objects in the scene.
+ FfxResource dilatedDepth; ///< A FfxResource allocated as described in FfxFsr3UpscalerSharedResourceDescriptions that is used to emit dilated depth and share with following effects.
+ FfxResource dilatedMotionVectors; ///< A FfxResource allocated as described in FfxFsr3UpscalerSharedResourceDescriptions that is used to emit dilated motion vectors and share with following effects.
+ FfxResource reconstructedPrevNearestDepth; ///< A FfxResource allocated as described in FfxFsr3UpscalerSharedResourceDescriptions that is used to emit reconstructed previous nearest depth and share with following effects.
+ FfxResource output; ///< A FfxResource containing the output color buffer for the current frame (at presentation resolution).
+ FfxFloatCoords2D jitterOffset; ///< The subpixel jitter offset applied to the camera.
+ FfxFloatCoords2D motionVectorScale; ///< The scale factor to apply to motion vectors.
+ FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resources.
+ FfxDimensions2D upscaleSize; ///< The resolution that the upscaler will output.
+ bool enableSharpening; ///< Enable an additional sharpening pass.
+ float sharpness; ///< The sharpness value between 0 and 1, where 0 is no additional sharpness and 1 is maximum additional sharpness.
+ float frameTimeDelta; ///< The time elapsed since the last frame (expressed in milliseconds).
+ float preExposure; ///< The pre exposure value (must be > 0.0f)
+ bool reset; ///< A boolean value which when set to true, indicates the camera has moved discontinuously.
+ float cameraNear; ///< The distance to the near plane of the camera.
+ float cameraFar; ///< The distance to the far plane of the camera.
+ float cameraFovAngleVertical; ///< The camera angle field of view in the vertical direction (expressed in radians).
+ float viewSpaceToMetersFactor; ///< The scale factor to convert view space units to meters
+ uint32_t flags; ///< combination of FfxFsr3UpscalerDispatchFlags
+
+ // GODOT BEGINS
+ float reprojectionMatrix[16];
+ // GODOT ENDS
+
+} FfxFsr3UpscalerDispatchDescription;
+
+/// A structure encapsulating the parameters for automatic generation of a reactive mask
+///
+/// @ingroup ffxFsr3Upscaler
+typedef struct FfxFsr3UpscalerGenerateReactiveDescription {
+
+ FfxCommandList commandList; ///< The FfxCommandList to record FSR3 rendering commands into.
+ FfxResource colorOpaqueOnly; ///< A FfxResource containing the opaque only color buffer for the current frame (at render resolution).
+ FfxResource colorPreUpscale; ///< A FfxResource containing the opaque+translucent color buffer for the current frame (at render resolution).
+ FfxResource outReactive; ///< A FfxResource containing the surface to generate the reactive mask into.
+ FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resources.
+ float scale; ///< A value to scale the output
+ float cutoffThreshold; ///< A threshold value to generate a binary reactive mask
+ float binaryValue; ///< A value to set for the binary reactive mask
+ uint32_t flags; ///< Flags to determine how to generate the reactive mask
+} FfxFsr3UpscalerGenerateReactiveDescription;
+
+/// A structure encapsulating the resource descriptions for shared resources for this effect.
+///
+/// @ingroup ffxFsr3Upscaler
+typedef struct FfxFsr3UpscalerSharedResourceDescriptions {
+
+ FfxCreateResourceDescription reconstructedPrevNearestDepth; ///< The FfxCreateResourceDescription for allocating the reconstructedPrevNearestDepth shared resource.
+ FfxCreateResourceDescription dilatedDepth; ///< The FfxCreateResourceDescription for allocating the dilatedDepth shared resource.
+ FfxCreateResourceDescription dilatedMotionVectors; ///< The FfxCreateResourceDescription for allocating the dilatedMotionVectors shared resource.
+} FfxFsr3UpscalerSharedResourceDescriptions;
+
+/// A structure encapsulating the FidelityFX Super Resolution 3 context.
+///
+/// This sets up an object which contains all persistent internal data and
+/// resources that are required by FSR3.
+///
+/// The FfxFsr3UpscalerContext object should have a lifetime matching
+/// your use of FSR3. Before destroying the FSR3 context care should be taken
+/// to ensure the GPU is not accessing the resources created or used by FSR3.
+/// It is therefore recommended that the GPU is idle before destroying the
+/// FSR3 context.
+///
+/// @ingroup ffxFsr3Upscaler
+typedef struct FfxFsr3UpscalerContext
+{
+ uint32_t data[FFX_FSR3UPSCALER_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context.
+} FfxFsr3UpscalerContext;
+
+
+/// Create a FidelityFX Super Resolution 3 context from the parameters
+/// programmed to the FfxFsr3UpscalerCreateParams structure.
+///
+/// The context structure is the main object used to interact with the FSR3
+/// API, and is responsible for the management of the internal resources used
+/// by the FSR3 algorithm. When this API is called, multiple calls will be
+/// made via the pointers contained in the callbacks structure.
+/// These callbacks will attempt to retreive the device capabilities, and
+/// create the internal resources, and pipelines required by FSR3's
+/// frame-to-frame function. Depending on the precise configuration used when
+/// creating the FfxFsr3UpscalerContext a different set of resources and
+/// pipelines might be requested via the callback functions.
+///
+/// The flags included in the flags field of
+/// FfxFsr3UpscalerContext how match the configuration of your
+/// application as well as the intended use of FSR3. It is important that these
+/// flags are set correctly (as well as a correct programmed
+/// FfxFsr3UpscalerDispatchDescription) to ensure correct operation. It is
+/// recommended to consult the overview documentation for further details on
+/// how FSR3 should be integerated into an application.
+///
+/// When the FfxFsr3UpscalerContext is created, you should use the
+/// ffxFsr3UpscalerContextDispatch function each frame where FSR3
+/// upscaling should be applied. See the documentation of
+/// ffxFsr3UpscalerContextDispatch for more details.
+///
+/// The FfxFsr3UpscalerContext should be destroyed when use of it is
+/// completed, typically when an application is unloaded or FSR3 upscaling is
+/// disabled by a user. To destroy the FSR3 context you should call
+/// ffxFsr3UpscalerContextDestroy.
+///
+/// @param [out] pContext A pointer to a FfxFsr3UpscalerContext structure to populate.
+/// @param [in] pContextDescription A pointer to a FfxFsr3UpscalerContextDescription structure.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or contextDescription was NULL.
+/// @retval
+/// FFX_ERROR_INCOMPLETE_INTERFACE The operation failed because the FfxFsr3UpscalerContextDescription.callbacks was not fully specified.
+/// @retval
+/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend.
+///
+/// @ingroup ffxFsr3Upscaler
+FFX_API FfxErrorCode ffxFsr3UpscalerContextCreate(FfxFsr3UpscalerContext* pContext, const FfxFsr3UpscalerContextDescription* pContextDescription);
+
+/// Provides the descriptions for shared resources that must be allocated for this effect.
+///
+/// @param [in] context A pointer to a FfxFsr3UpscalerContext structure.
+/// @param [out] SharedResources A pointer to a FfxFsr3UpscalerSharedResourceDescriptions to populate.
+///
+/// @returns
+/// FFX_OK The operation completed successfully.
+/// @returns
+/// Anything else The operation failed.
+///
+/// @ingroup ffxFsr3Upscaler
+FFX_API FfxErrorCode ffxFsr3UpscalerGetSharedResourceDescriptions(FfxFsr3UpscalerContext* context, FfxFsr3UpscalerSharedResourceDescriptions* SharedResources);
+
+/// Get GPU memory usage of the FidelityFX Super Resolution context.
+///
+/// @param [in] pContext A pointer to a FfxFsr3UpscalerContext structure.
+/// @param [out] pVramUsage A pointer to a FfxEffectMemoryUsage structure.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or vramUsage were NULL.
+///
+/// @ingroup ffxFsr3Upscaler
+FFX_API FfxErrorCode ffxFsr3UpscalerContextGetGpuMemoryUsage(FfxFsr3UpscalerContext* pContext, FfxEffectMemoryUsage* pVramUsage);
+
+/// Dispatch the various passes that constitute FidelityFX Super Resolution 3.
+///
+/// FSR3 is a composite effect, meaning that it is compromised of multiple
+/// constituent passes (implemented as one or more clears, copies and compute
+/// dispatches). The ffxFsr3UpscalerContextDispatch function is the
+/// function which (via the use of the functions contained in the
+/// callbacks field of the FfxFsr3UpscalerContext
+/// structure) utlimately generates the sequence of graphics API calls required
+/// each frame.
+///
+/// As with the creation of the FfxFsr3UpscalerContext correctly
+/// programming the FfxFsr3UpscalerDispatchDescription is key to ensuring
+/// the correct operation of FSR3. It is particularly important to ensure that
+/// camera jitter is correctly applied to your application's projection matrix
+/// (or camera origin for raytraced applications). FSR3 provides the
+/// ffxFsr3UpscalerGetJitterPhaseCount and
+/// ffxFsr3UpscalerGetJitterOffset entry points to help applications
+/// correctly compute the camera jitter. Whatever jitter pattern is used by the
+/// application it should be correctly programmed to the
+/// jitterOffset field of the dispatchDescription
+/// structure. For more guidance on camera jitter please consult the
+/// documentation for ffxFsr3UpscalerGetJitterOffset as well as the
+/// accompanying overview documentation for FSR3.
+///
+/// @param [in] pContext A pointer to a FfxFsr3UpscalerContext structure.
+/// @param [in] pDispatchDescription A pointer to a FfxFsr3UpscalerDispatchDescription structure.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or dispatchDescription was NULL.
+/// @retval
+/// FFX_ERROR_OUT_OF_RANGE The operation failed because dispatchDescription.renderSize was larger than the maximum render resolution.
+/// @retval
+/// FFX_ERROR_NULL_DEVICE The operation failed because the device inside the context was NULL.
+/// @retval
+/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend.
+///
+/// @ingroup ffxFsr3Upscaler
+FFX_API FfxErrorCode ffxFsr3UpscalerContextDispatch(FfxFsr3UpscalerContext* pContext, const FfxFsr3UpscalerDispatchDescription* pDispatchDescription);
+
+/// A helper function generate a Reactive mask from an opaque only texure and one containing translucent objects.
+///
+/// @param [in] pContext A pointer to a FfxFsr3UpscalerContext structure.
+/// @param [in] pParams A pointer to a FfxFsr3UpscalerGenerateReactiveDescription structure
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+///
+/// @ingroup ffxFsr3Upscaler
+FFX_API FfxErrorCode ffxFsr3UpscalerContextGenerateReactiveMask(FfxFsr3UpscalerContext* pContext, const FfxFsr3UpscalerGenerateReactiveDescription* pParams);
+
+/// Destroy the FidelityFX Super Resolution context.
+///
+/// @param [out] pContext A pointer to a FfxFsr3UpscalerContext structure to destroy.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context was NULL.
+///
+/// @ingroup ffxFsr3Upscaler
+FFX_API FfxErrorCode ffxFsr3UpscalerContextDestroy(FfxFsr3UpscalerContext* pContext);
+
+/// Get the upscale ratio from the quality mode.
+///
+/// The following table enumerates the mapping of the quality modes to
+/// per-dimension scaling ratios.
+///
+/// Quality preset | Scale factor
+/// ----------------------------------------------------- | -------------
+/// FFX_FSR3UPSCALER_QUALITY_MODE_QUALITY | 1.5x
+/// FFX_FSR3UPSCALER_QUALITY_MODE_BALANCED | 1.7x
+/// FFX_FSR3UPSCALER_QUALITY_MODE_PERFORMANCE | 2.0x
+/// FFX_FSR3UPSCALER_QUALITY_MODE_ULTRA_PERFORMANCE | 3.0x
+///
+/// Passing an invalid qualityMode will return 0.0f.
+///
+/// @param [in] qualityMode The quality mode preset.
+///
+/// @returns
+/// The upscaling the per-dimension upscaling ratio for
+/// qualityMode according to the table above.
+///
+/// @ingroup ffxFsr3Upscaler
+FFX_API float ffxFsr3UpscalerGetUpscaleRatioFromQualityMode(FfxFsr3UpscalerQualityMode qualityMode);
+
+/// A helper function to calculate the rendering resolution from a target
+/// resolution and desired quality level.
+///
+/// This function applies the scaling factor returned by
+/// ffxFsr3UpscalerGetUpscaleRatioFromQualityMode to each dimension.
+///
+/// @param [out] pRenderWidth A pointer to a uint32_t which will hold the calculated render resolution width.
+/// @param [out] pRenderHeight A pointer to a uint32_t which will hold the calculated render resolution height.
+/// @param [in] displayWidth The target display resolution width.
+/// @param [in] displayHeight The target display resolution height.
+/// @param [in] qualityMode The desired quality mode for FSR 2 upscaling.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_INVALID_POINTER Either renderWidth or renderHeight was NULL.
+/// @retval
+/// FFX_ERROR_INVALID_ENUM An invalid quality mode was specified.
+///
+/// @ingroup ffxFsr3Upscaler
+FFX_API FfxErrorCode ffxFsr3UpscalerGetRenderResolutionFromQualityMode(
+ uint32_t* pRenderWidth,
+ uint32_t* pRenderHeight,
+ uint32_t displayWidth,
+ uint32_t displayHeight,
+ FfxFsr3UpscalerQualityMode qualityMode);
+
+/// A helper function to calculate the jitter phase count from display
+/// resolution.
+///
+/// For more detailed information about the application of camera jitter to
+/// your application's rendering please refer to the
+/// ffxFsr3UpscalerGetJitterOffset function.
+///
+/// The table below shows the jitter phase count which this function
+/// would return for each of the quality presets.
+///
+/// Quality preset | Scale factor | Phase count
+/// ----------------------------------------------------- | ------------- | ---------------
+/// FFX_FSR3UPSCALER_QUALITY_MODE_QUALITY | 1.5x | 18
+/// FFX_FSR3UPSCALER_QUALITY_MODE_BALANCED | 1.7x | 23
+/// FFX_FSR3UPSCALER_QUALITY_MODE_PERFORMANCE | 2.0x | 32
+/// FFX_FSR3UPSCALER_QUALITY_MODE_ULTRA_PERFORMANCE | 3.0x | 72
+/// Custom | [1..n]x | ceil(8*n^2)
+///
+/// @param [in] renderWidth The render resolution width.
+/// @param [in] displayWidth The display resolution width.
+///
+/// @returns
+/// The jitter phase count for the scaling factor between renderWidth and displayWidth.
+///
+/// @ingroup ffxFsr3Upscaler
+FFX_API int32_t ffxFsr3UpscalerGetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth);
+
+/// A helper function to calculate the subpixel jitter offset.
+///
+/// FSR3 relies on the application to apply sub-pixel jittering while rendering.
+/// This is typically included in the projection matrix of the camera. To make
+/// the application of camera jitter simple, the FSR3 API provides a small set
+/// of utility function which computes the sub-pixel jitter offset for a
+/// particular frame within a sequence of separate jitter offsets. To begin, the
+/// index within the jitter phase must be computed. To calculate the
+/// sequence's length, you can call the ffxFsr3UpscalerGetJitterPhaseCount
+/// function. The index should be a value which is incremented each frame modulo
+/// the length of the sequence computed by ffxFsr3UpscalerGetJitterPhaseCount.
+/// The index within the jitter phase is passed to
+/// ffxFsr3UpscalerGetJitterOffset via the index parameter.
+///
+/// This function uses a Halton(2,3) sequence to compute the jitter offset.
+/// The ultimate index used for the sequence is index %
+/// phaseCount.
+///
+/// It is important to understand that the values returned from the
+/// ffxFsr3UpscalerGetJitterOffset function are in unit pixel space, and
+/// in order to composite this correctly into a projection matrix we must
+/// convert them into projection offsets. This is done as per the pseudo code
+/// listing which is shown below.
+///
+/// const int32_t jitterPhaseCount = ffxFsr3UpscalerGetJitterPhaseCount(renderWidth, displayWidth);
+///
+/// float jitterX = 0;
+/// float jitterY = 0;
+/// ffxFsr3UpscalerGetJitterOffset(&jitterX, &jitterY, index, jitterPhaseCount);
+///
+/// const float jitterX = 2.0f * jitterX / (float)renderWidth;
+/// const float jitterY = -2.0f * jitterY / (float)renderHeight;
+/// const Matrix4 jitterTranslationMatrix = translateMatrix(Matrix3::identity, Vector3(jitterX, jitterY, 0));
+/// const Matrix4 jitteredProjectionMatrix = jitterTranslationMatrix * projectionMatrix;
+///
+/// Jitter should be applied to all rendering. This includes opaque, alpha
+/// transparent, and raytraced objects. For rasterized objects, the sub-pixel
+/// jittering values calculated by the iffxFsr3UpscalerGetJitterOffset
+/// function can be applied to the camera projection matrix which is ultimately
+/// used to perform transformations during vertex shading. For raytraced
+/// rendering, the sub-pixel jitter should be applied to the ray's origin,
+/// often the camera's position.
+///
+/// Whether you elect to use the ffxFsr3UpscalerGetJitterOffset function
+/// or your own sequence generator, you must program the
+/// jitterOffset field of the
+/// FfxFsr3UpscalerDispatchParameters structure in order to inform FSR3
+/// of the jitter offset that has been applied in order to render each frame.
+///
+/// If not using the recommended ffxFsr3UpscalerGetJitterOffset function,
+/// care should be taken that your jitter sequence never generates a null vector;
+/// that is value of 0 in both the X and Y dimensions.
+///
+/// @param [out] pOutX A pointer to a float which will contain the subpixel jitter offset for the x dimension.
+/// @param [out] pOutY A pointer to a float which will contain the subpixel jitter offset for the y dimension.
+/// @param [in] index The index within the jitter sequence.
+/// @param [in] phaseCount The length of jitter phase. See ffxFsr3UpscalerGetJitterPhaseCount.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_INVALID_POINTER Either outX or outY was NULL.
+/// @retval
+/// FFX_ERROR_INVALID_ARGUMENT Argument phaseCount must be greater than 0.
+///
+/// @ingroup ffxFsr3Upscaler
+FFX_API FfxErrorCode ffxFsr3UpscalerGetJitterOffset(float* pOutX, float* pOutY, int32_t index, int32_t phaseCount);
+
+/// A helper function to check if a resource is
+/// FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL.
+///
+/// @param [in] resource A FfxResource.
+///
+/// @returns
+/// true The resource was not FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL.
+/// @returns
+/// false The resource was FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL.
+///
+/// @ingroup ffxFsr3Upscaler
+FFX_API bool ffxFsr3UpscalerResourceIsNull(FfxResource resource);
+
+/// Queries the effect version number.
+///
+/// @returns
+/// The SDK version the effect was built with.
+///
+/// @ingroup ffxFsr3Upscaler
+FFX_API FfxVersionNumber ffxFsr3UpscalerGetEffectVersion();
+
+/// Override upscaler constant buffer value after upscaler context creation.
+///
+/// @param [in] context A pointer to a FfxFsr3UpscalerContext structure.
+/// @param [in] key A key from FfxFsr3UpscalerConfigureKey enum
+/// @param [in] valuePtr A pointer to value to pass to shader in Constant Buffer. See Fsr3UpscalerConstants
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_INVALID_ENUM An invalid FfxFsr3UpscalerConfigureKey was specified.
+/// @retval
+/// FFX_ERROR_INVALID_POINTER pContext was NULL.
+///
+/// @ingroup ffxFsr3Upscaler
+FFX_API FfxErrorCode ffxFsr3UpscalerSetConstant(FfxFsr3UpscalerContext* context, FfxFsr3UpscalerConfigureKey key, void* valuePtr);
+
+/// Set global debug message settings
+///
+/// @param [in] fpMessage A ffxMessageCallback
+/// @param [in] debugLevel An unsigned integer. Unimplemented.
+/// @retval
+/// FFX_OK The operation completed successfully.
+///
+/// @ingroup FRAMEINTERPOLATION
+FFX_API FfxErrorCode ffxFsr3UpscalerSetGlobalDebugMessage(ffxMessageCallback fpMessage, uint32_t debugLevel);
+
+#if defined(__cplusplus)
+}
+#endif // #if defined(__cplusplus)
diff --git a/thirdparty/amd-ffx/ffx_fsr3upscaler_private.h b/thirdparty/amd-ffx/ffx_fsr3upscaler_private.h
new file mode 100644
index 000000000000..dc80dcd3ae60
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_fsr3upscaler_private.h
@@ -0,0 +1,127 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+#include "gpu/fsr3upscaler/ffx_fsr3upscaler_resources.h"
+
+/// An enumeration of all the permutations that can be passed to the FSR3 Upscaler algorithm.
+///
+/// FSR3 Upscaler features are organized through a set of pre-defined compile
+/// permutation options that need to be specified. Which shader blob
+/// is returned for pipeline creation will be determined by what combination
+/// of shader permutations are enabled.
+///
+/// @ingroup FSR3Upscaler
+typedef enum Fs3UpscalerShaderPermutationOptions
+{
+ FSR3UPSCALER_SHADER_PERMUTATION_USE_LANCZOS_TYPE = (1 << 0), ///< Off means reference, On means LUT
+ FSR3UPSCALER_SHADER_PERMUTATION_HDR_COLOR_INPUT = (1 << 1), ///< Enables the HDR code path
+ FSR3UPSCALER_SHADER_PERMUTATION_LOW_RES_MOTION_VECTORS = (1 << 2), ///< Indicates low resolution motion vectors provided
+ FSR3UPSCALER_SHADER_PERMUTATION_JITTER_MOTION_VECTORS = (1 << 3), ///< Indicates motion vectors were generated with jitter
+ FSR3UPSCALER_SHADER_PERMUTATION_DEPTH_INVERTED = (1 << 4), ///< Indicates input resources were generated with inverted depth
+ FSR3UPSCALER_SHADER_PERMUTATION_ENABLE_SHARPENING = (1 << 5), ///< Enables a supplementary sharpening pass
+ FSR3UPSCALER_SHADER_PERMUTATION_FORCE_WAVE64 = (1 << 6), ///< doesn't map to a define, selects different table
+ FSR3UPSCALER_SHADER_PERMUTATION_ALLOW_FP16 = (1 << 7), ///< Enables fast math computations where possible
+} Fs3UpscalerShaderPermutationOptions;
+
+// Constants for FSR3 Upscaler dispatches. Must be kept in sync with cbFSR3Upscaler in ffx_fsr2_callbacks_hlsl.h
+typedef struct Fsr3UpscalerConstants {
+
+ int32_t renderSize[2];
+ int32_t previousFrameRenderSize[2];
+
+ int32_t upscaleSize[2];
+ int32_t previousFrameUpscaleSize[2];
+
+ int32_t maxRenderSize[2];
+ int32_t maxUpscaleSize[2];
+
+ float deviceToViewDepth[4];
+
+ float jitterOffset[2];
+ float previousFrameJitterOffset[2];
+
+ float motionVectorScale[2];
+ float downscaleFactor[2];
+
+ float motionVectorJitterCancellation[2];
+ float tanHalfFOV;
+ float jitterPhaseCount;
+
+ float deltaTime;
+ float deltaPreExposure;
+ float viewSpaceToMetersFactor;
+ float frameIndex;
+
+ float velocityFactor;
+ float reactivenessScale;
+ float shadingChangeScale;
+ float accumulationAddedPerFrame;
+ float minDisocclusionAccumulation;
+
+ // GODOT BEGINS
+ float pad[3];
+ float reprojectionMatrix[16];
+ // GODOT ENDS
+} Fsr3UpscalerConstants;
+
+struct FfxFsr3UpscalerContextDescription;
+struct FfxDeviceCapabilities;
+struct FfxPipelineState;
+
+// FfxFsr3UpscalerContext_Private
+// The private implementation of the FSR3 Upscaler context.
+typedef struct FfxFsr3UpscalerContext_Private {
+
+ FfxFsr3UpscalerContextDescription contextDescription;
+ FfxUInt32 effectContextId;
+ Fsr3UpscalerConstants constants;
+ FfxDevice device;
+ FfxDeviceCapabilities deviceCapabilities;
+ FfxPipelineState pipelinePrepareInputs;
+ FfxPipelineState pipelinePrepareReactivity;
+ FfxPipelineState pipelineShadingChange;
+ FfxPipelineState pipelineAccumulate;
+ FfxPipelineState pipelineAccumulateSharpen;
+ FfxPipelineState pipelineRCAS;
+ FfxPipelineState pipelineLumaPyramid;
+ FfxPipelineState pipelineGenerateReactive;
+ FfxPipelineState pipelineTcrAutogenerate;
+ FfxPipelineState pipelineShadingChangePyramid;
+ FfxPipelineState pipelineLumaInstability;
+ FfxPipelineState pipelineDebugView;
+ FfxConstantBuffer constantBuffers[FFX_FSR3UPSCALER_CONSTANTBUFFER_COUNT];
+
+ // 2 arrays of resources, as e.g. FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_STATUS will use different resources when bound as SRV vs when bound as UAV
+ FfxResourceInternal srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT];
+ FfxResourceInternal uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT];
+
+ bool firstExecution;
+ uint32_t resourceFrameIndex;
+ float previousJitterOffset[2];
+ float preExposure;
+ float previousFramePreExposure;
+
+} FfxFsr3UpscalerContext_Private;
+
+// declare fsr3UpscalerCreate so it can be used from fsr3
+FFX_API FfxErrorCode fsr3UpscalerCreate(FfxFsr3UpscalerContext_Private* context, const FfxFsr3UpscalerContextDescription* contextDescription);
diff --git a/thirdparty/amd-ffx/ffx_interface.h b/thirdparty/amd-ffx/ffx_interface.h
new file mode 100644
index 000000000000..7fe3916296fd
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_interface.h
@@ -0,0 +1,676 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+#include "ffx_assert.h"
+#include "ffx_types.h"
+#include "ffx_error.h"
+#include "ffx_message.h"
+
+// GODOT BEGINS
+// Fix GCC build
+#ifndef _MSC_VER
+#include
+#include
+#define wcscpy_s wcscpy
+#ifndef _countof
+#define _countof(a) (sizeof(a) / sizeof(*(a)))
+#endif
+#endif
+// GODOT ENDS
+
+#if defined(__cplusplus)
+#define FFX_CPU
+extern "C" {
+#endif // #if defined(__cplusplus)
+
+/// @defgroup Backends Backends
+/// Core interface declarations and natively supported backends
+///
+/// @ingroup ffxSDK
+
+/// @defgroup FfxInterface FfxInterface
+/// FidelityFX SDK function signatures and core defines requiring
+/// overrides for backend implementation.
+///
+/// @ingroup Backends
+FFX_FORWARD_DECLARE(FfxInterface);
+
+/// FidelityFX SDK major version.
+///
+/// @ingroup FfxInterface
+#define FFX_SDK_VERSION_MAJOR (1)
+
+/// FidelityFX SDK minor version.
+///
+/// @ingroup FfxInterface
+#define FFX_SDK_VERSION_MINOR (1)
+
+/// FidelityFX SDK patch version.
+///
+/// @ingroup FfxInterface
+#define FFX_SDK_VERSION_PATCH (4)
+
+/// Macro to pack a FidelityFX SDK version id together.
+///
+/// @ingroup FfxInterface
+#define FFX_SDK_MAKE_VERSION( major, minor, patch ) ( ( major << 22 ) | ( minor << 12 ) | patch )
+
+/// Stand in type for FfxPass
+///
+/// These will be defined for each effect individually (i.e. FfxFsr2Pass).
+/// They are used to fetch the proper blob index to build effect shaders
+///
+/// @ingroup FfxInterface
+typedef uint32_t FfxPass;
+
+/// Get the SDK version of the backend context.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+///
+/// @returns
+/// The SDK version a backend was built with.
+///
+/// @ingroup FfxInterface
+typedef FfxVersionNumber(*FfxGetSDKVersionFunc)(
+ FfxInterface* backendInterface);
+
+/// Get effect VRAM usage.
+///
+/// Newer effects may require support that legacy versions of the SDK will not be
+/// able to provide. A version query is thus required to ensure an effect component
+/// will always be paired with a backend which will support all needed functionality.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] effectContextId The context space to be used for the effect in question.
+/// @param [out] outVramUsage The effect memory usage structure to fill out.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode (*FfxGetEffectGpuMemoryUsageFunc)(FfxInterface* backendInterface, FfxUInt32 effectContextId, FfxEffectMemoryUsage* outVramUsage);
+
+/// Create and initialize the backend context.
+///
+/// The callback function sets up the backend context for rendering.
+/// It will create or reference the device and create required internal data structures.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] effect The effect the context is being created for
+/// @param [in] bindlessConfig A pointer to the bindless configuration, if required by the effect.
+/// @param [out] effectContextId The context space to be used for the effect in question.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode (*FfxCreateBackendContextFunc)(
+ FfxInterface* backendInterface,
+ FfxEffect effect,
+ FfxEffectBindlessConfig* bindlessConfig,
+ FfxUInt32* effectContextId);
+
+/// Get a list of capabilities of the device.
+///
+/// When creating an FfxEffectContext it is desirable for the FFX
+/// core implementation to be aware of certain characteristics of the platform
+/// that is being targetted. This is because some optimizations which FFX SDK
+/// attempts to perform are more effective on certain classes of hardware than
+/// others, or are not supported by older hardware. In order to avoid cases
+/// where optimizations actually have the effect of decreasing performance, or
+/// reduce the breadth of support provided by FFX SDK, the FFX interface queries the
+/// capabilities of the device to make such decisions.
+///
+/// For target platforms with fixed hardware support you need not implement
+/// this callback function by querying the device, but instead may hardcore
+/// what features are available on the platform.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [out] outDeviceCapabilities The device capabilities structure to fill out.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode(*FfxGetDeviceCapabilitiesFunc)(
+ FfxInterface* backendInterface,
+ FfxDeviceCapabilities* outDeviceCapabilities);
+
+/// Destroy the backend context and dereference the device.
+///
+/// This function is called when the FfxEffectContext is destroyed.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] effectContextId The context space to be used for the effect in question.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode(*FfxDestroyBackendContextFunc)(
+ FfxInterface* backendInterface,
+ FfxUInt32 effectContextId);
+
+/// Create a resource.
+///
+/// This callback is intended for the backend to create internal resources.
+///
+/// Please note: It is also possible that the creation of resources might
+/// itself cause additional resources to be created by simply calling the
+/// FfxCreateResourceFunc function pointer again. This is
+/// useful when handling the initial creation of resources which must be
+/// initialized. The flow in such a case would be an initial call to create the
+/// CPU-side resource, another to create the GPU-side resource, and then a call
+/// to schedule a copy render job to move the data between the two. Typically
+/// this type of function call flow is only seen during the creation of an
+/// FfxEffectContext.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] createResourceDescription A pointer to a FfxCreateResourceDescription.
+/// @param [in] effectContextId The context space to be used for the effect in question.
+/// @param [out] outResource A pointer to a FfxResource object.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode (*FfxCreateResourceFunc)(
+ FfxInterface* backendInterface,
+ const FfxCreateResourceDescription* createResourceDescription,
+ FfxUInt32 effectContextId,
+ FfxResourceInternal* outResource);
+
+/// Register a resource in the backend for the current frame.
+///
+/// Since the FfxInterface and the backends are not aware how many different
+/// resources will get passed in over time, it's not safe
+/// to register all resources simultaneously in the backend.
+/// Also passed resources may not be valid after the dispatch call.
+/// As a result it's safest to register them as FfxResourceInternal
+/// and clear them at the end of the dispatch call.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] inResource A pointer to a FfxResource.
+/// @param [in] effectContextId The context space to be used for the effect in question.
+/// @param [out] outResource A pointer to a FfxResourceInternal object.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode(*FfxRegisterResourceFunc)(
+ FfxInterface* backendInterface,
+ const FfxResource* inResource,
+ FfxUInt32 effectContextId,
+ FfxResourceInternal* outResource);
+
+
+/// Get an FfxResource from an FfxResourceInternal resource.
+///
+/// At times it is necessary to create an FfxResource representation
+/// of an internally created resource in order to register it with a
+/// child effect context. This function sets up the FfxResource needed
+/// to register.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] resource The FfxResourceInternal for which to setup an FfxResource.
+///
+/// @returns
+/// An FfxResource built from the internal resource
+///
+/// @ingroup FfxInterface
+typedef FfxResource(*FfxGetResourceFunc)(
+ FfxInterface* backendInterface,
+ FfxResourceInternal resource);
+
+/// Unregister all temporary FfxResourceInternal from the backend.
+///
+/// Unregister FfxResourceInternal referencing resources passed to
+/// a function as a parameter.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] commandList A pointer to a FfxCommandList structure.
+/// @param [in] effectContextId The context space to be used for the effect in question.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode(*FfxUnregisterResourcesFunc)(
+ FfxInterface* backendInterface,
+ FfxCommandList commandList,
+ FfxUInt32 effectContextId);
+
+/// Register a resource in the static bindless table of the backend.
+///
+/// A static resource will persist in their respective bindless table until it is
+/// overwritten by a different resource at the same index.
+/// The calling code must take care not to immediately register a new resource at an index
+/// that might be in use by an in-flight frame.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] desc A pointer to an FfxStaticResourceDescription.
+/// @param [in] effectContextId The context space to be used for the effect in question.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode (*FfxRegisterStaticResourceFunc)(FfxInterface* backendInterface,
+ const FfxStaticResourceDescription* desc,
+ FfxUInt32 effectContextId);
+
+/// Retrieve a FfxResourceDescription matching a
+/// FfxResource structure.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] resource A pointer to a FfxResource object.
+///
+/// @returns
+/// A description of the resource.
+///
+/// @ingroup FfxInterface
+typedef FfxResourceDescription (*FfxGetResourceDescriptionFunc)(
+ FfxInterface* backendInterface,
+ FfxResourceInternal resource);
+
+/// Destroy a resource
+///
+/// This callback is intended for the backend to release an internal resource.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] resource A pointer to a FfxResource object.
+/// @param [in] effectContextId The context space to be used for the effect in question.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode (*FfxDestroyResourceFunc)(
+ FfxInterface* backendInterface,
+ FfxResourceInternal resource,
+ FfxUInt32 effectContextId);
+
+/// Map resource memory
+///
+/// Maps the memory of the resource to a pointer and returns it.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] resource A pointer to a FfxResource object.
+/// @param [out] ptr A pointer to the mapped memory.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode (*FfxMapResourceFunc)(FfxInterface* backendInterface, FfxResourceInternal resource, void** ptr);
+
+/// Unmap resource memory
+///
+/// Unmaps previously mapped memory of a resource.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] resource A pointer to a FfxResource object.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode (*FfxUnmapResourceFunc)(FfxInterface* backendInterface, FfxResourceInternal resource);
+
+/// Destroy a resource
+///
+/// This callback is intended for the backend to release an internal resource.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] resource A pointer to a FfxResource object.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode (*FfxStageConstantBufferDataFunc)(
+ FfxInterface* backendInterface,
+ void* data,
+ FfxUInt32 size,
+ FfxConstantBuffer* constantBuffer);
+
+/// Create a render pipeline.
+///
+/// A rendering pipeline contains the shader as well as resource bindpoints
+/// and samplers.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] pass The identifier for the pass.
+/// @param [in] pipelineDescription A pointer to a FfxPipelineDescription describing the pipeline to be created.
+/// @param [in] effectContextId The context space to be used for the effect in question.
+/// @param [out] outPipeline A pointer to a FfxPipelineState structure which should be populated.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode (*FfxCreatePipelineFunc)(
+ FfxInterface* backendInterface,
+ FfxEffect effect,
+ FfxPass pass,
+ uint32_t permutationOptions,
+ const FfxPipelineDescription* pipelineDescription,
+ FfxUInt32 effectContextId,
+ FfxPipelineState* outPipeline);
+
+typedef FfxErrorCode(*FfxGetPermutationBlobByIndexFunc)(FfxEffect effectId,
+ FfxPass passId,
+ FfxBindStage bindStage,
+ uint32_t permutationOptions,
+ FfxShaderBlob* outBlob);
+
+/// Destroy a render pipeline.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] effectContextId The context space to be used for the effect in question.
+/// @param [out] pipeline A pointer to a FfxPipelineState structure which should be released.
+/// @param [in] effectContextId The context space to be used for the effect in question.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode (*FfxDestroyPipelineFunc)(
+ FfxInterface* backendInterface,
+ FfxPipelineState* pipeline,
+ FfxUInt32 effectContextId);
+
+/// Schedule a render job to be executed on the next call of
+/// FfxExecuteGpuJobsFunc.
+///
+/// Render jobs can perform one of three different tasks: clear, copy or
+/// compute dispatches.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] job A pointer to a FfxGpuJobDescription structure.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode (*FfxScheduleGpuJobFunc)(
+ FfxInterface* backendInterface,
+ const FfxGpuJobDescription* job);
+
+/// Execute scheduled render jobs on the comandList provided.
+///
+/// The recording of the graphics API commands should take place in this
+/// callback function, the render jobs which were previously enqueued (via
+/// callbacks made to FfxScheduleGpuJobFunc) should be
+/// processed in the order they were received. Advanced users might choose to
+/// reorder the rendering jobs, but should do so with care to respect the
+/// resource dependencies.
+///
+/// Depending on the precise contents of FfxDispatchDescription a
+/// different number of render jobs might have previously been enqueued (for
+/// example if sharpening is toggled on and off).
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] commandList A pointer to a FfxCommandList structure.
+/// @param [in] effectContextId The context space to be used for the effect in question.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode (*FfxExecuteGpuJobsFunc)(
+ FfxInterface* backendInterface,
+ FfxCommandList commandList,
+ FfxUInt32 effectContextId);
+
+typedef enum FfxUiCompositionFlags
+{
+ FFX_UI_COMPOSITION_FLAG_USE_PREMUL_ALPHA = (1 << 0), ///< A bit indicating that we use premultiplied alpha for UI composition
+ FFX_UI_COMPOSITION_FLAG_ENABLE_INTERNAL_UI_DOUBLE_BUFFERING = (1 << 1), ///< A bit indicating that the swapchain should doublebuffer the UI resource
+} FfxUiCompositionFlags;
+
+typedef FfxErrorCode(*FfxPresentCallbackFunc)(const FfxPresentCallbackDescription* params, void*);
+typedef FfxErrorCode(*FfxFrameGenerationDispatchFunc)(const FfxFrameGenerationDispatchDescription* params, void*);
+typedef FfxErrorCode(*FfxWaitCallbackFunc)(wchar_t* fenceName, uint64_t fenceValueToWaitFor);
+
+/// A structure representing the configuration options to pass to FrameInterpolationSwapChain
+///
+/// @ingroup FfxInterface
+typedef struct FfxFrameGenerationConfig
+{
+ FfxSwapchain swapChain; ///< The FfxSwapchain to use with frame interpolation
+ FfxPresentCallbackFunc presentCallback; ///< A UI composition callback to call when finalizing the frame image
+ void* presentCallbackContext; ///< A pointer to be passed to the UI composition callback
+ FfxFrameGenerationDispatchFunc frameGenerationCallback; ///< The frame generation callback to use to generate the interpolated frame
+ void* frameGenerationCallbackContext; ///< A pointer to be passed to the frame generation callback
+ bool frameGenerationEnabled; ///< Sets the state of frame generation. Set to false to disable frame generation
+ bool allowAsyncWorkloads; ///< Sets the state of async workloads. Set to true to enable interpolation work on async compute
+ bool allowAsyncPresent; ///< Sets the state of async presentation (console only). Set to true to enable present from async command queue
+ FfxResource HUDLessColor; ///< The hudless back buffer image to use for UI extraction from backbuffer resource
+ FfxUInt32 flags; ///< Flags
+ bool onlyPresentInterpolated; ///< Set to true to only present interpolated frame
+ FfxRect2D interpolationRect; ///< Set the area in the backbuffer that will be interpolated
+ uint64_t frameID; ///< A frame identifier used to synchronize resource usage in workloads
+ bool drawDebugPacingLines; ///< Sets the state of pacing debug lines. Set to true to display debug lines
+} FfxFrameGenerationConfig;
+
+typedef FfxErrorCode (*FfxSwapChainConfigureFrameGenerationFunc)(FfxFrameGenerationConfig const* config);
+
+/// Allocate AMD FidelityFX Breadcrumbs Library markers buffer.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] blockBytes Size in bytes of the buffer to be allocated.
+/// @param [out] blockData Output information about allocated AMD FidelityFX Breadcrumbs Library buffer. Filled only on success of operation.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// Anything else The operation failed.
+///
+/// @ingroup FfxInterface
+typedef FfxErrorCode (*FfxBreadcrumbsAllocBlockFunc)(
+ FfxInterface* backendInterface,
+ uint64_t blockBytes,
+ FfxBreadcrumbsBlockData* blockData
+ );
+
+/// Deallocate AMD FidelityFX Breadcrumbs Library markers buffer.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [out] blockData Information about buffer to be freed. All resource handles are cleared after this operation.
+///
+/// @ingroup FfxInterface
+typedef void (*FfxBreadcrumbsFreeBlockFunc)(
+ FfxInterface* backendInterface,
+ FfxBreadcrumbsBlockData* blockData
+ );
+
+/// Write marker to AMD FidelityFX Breadcrumbs Library buffer on the comandList provided.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] commandList GPU command list to record marker writing command.
+/// @param [in] value Marker value to be written.
+/// @param [in] gpuLocation GPU destination address where marker will be written.
+/// @param [in] gpuBuffer Destination AMD FidelityFX Breadcrumbs Library buffer.
+/// @param [in] isBegin true for writing opening marker and false for ending marker.
+///
+/// @ingroup FfxInterface
+typedef void (*FfxBreadcrumbsWriteFunc)(
+ FfxInterface* backendInterface,
+ FfxCommandList commandList,
+ uint32_t value,
+ uint64_t gpuLocation,
+ void* gpuBuffer,
+ bool isBegin
+ );
+
+/// Printing GPU specific info to the AMD FidelityFX Breadcrumbs Library status buffer.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] allocs A pointer to the allocation callbacks.
+/// @param [in] extendedInfo true if should print more verbose device info and false for standard output.
+/// @param [out] printBuffer String buffer for writing GPU info.
+/// @param [out] printSize Size of string buffer for writing GPU info.
+///
+/// @ingroup FfxInterface
+typedef void (*FfxBreadcrumbsPrintDeviceInfoFunc)(
+ FfxInterface* backendInterface,
+ FfxAllocationCallbacks* allocs,
+ bool extendedInfo,
+ char** printBuffer,
+ size_t* printSize
+ );
+
+/// Register a Thread Safe constant buffer allocator to be used by the backend.
+///
+/// @param [in] backendInterface A pointer to the backend interface.
+/// @param [in] constantAllocator An FfxConstantBufferAllocator callback to be used by the backend.
+///
+/// @ingroup FfxInterface
+typedef void(*FfxRegisterConstantBufferAllocatorFunc)(FfxInterface* backendInterface,
+ FfxConstantBufferAllocator constantAllocator);
+
+/// A structure encapsulating the interface between the core implementation of
+/// the FfxInterface and any graphics API that it should ultimately call.
+///
+/// This set of functions serves as an abstraction layer between FfxInterfae and the
+/// API used to implement it. While the FidelityFX SDK ships with backends for DirectX12 and
+/// Vulkan, it is possible to implement your own backend for other platforms
+/// which sit on top of your engine's own abstraction layer. For details on the
+/// expectations of what each function should do you should refer the
+/// description of the following function pointer types:
+/// - FfxCreateDeviceFunc
+/// - FfxGetDeviceCapabilitiesFunc
+/// - FfxDestroyDeviceFunc
+/// - FfxCreateResourceFunc
+/// - FfxRegisterResourceFunc
+/// - FfxGetResourceFunc
+/// - FfxUnregisterResourcesFunc
+/// - FfxGetResourceDescriptionFunc
+/// - FfxDestroyResourceFunc
+/// - FfxCreatePipelineFunc
+/// - FfxDestroyPipelineFunc
+/// - FfxScheduleGpuJobFunc
+/// - FfxExecuteGpuJobsFunc
+/// - FfxBeginMarkerFunc
+/// - FfxEndMarkerFunc
+/// - FfxRegisterConstantBufferAllocatorFunc
+///
+/// Depending on the graphics API that is abstracted by the backend, it may be
+/// required that the backend is to some extent stateful. To ensure that
+/// applications retain full control to manage the memory used by the FidelityFX SDK, the
+/// scratchBuffer and scratchBufferSize fields are
+/// provided. A backend should provide a means of specifying how much scratch
+/// memory is required for its internal implementation (e.g: via a function
+/// or constant value). The application is then responsible for allocating that
+/// memory and providing it when setting up the SDK backend. Backends provided
+/// with the FidelityFX SDK do not perform dynamic memory allocations, and instead
+/// sub-allocate all memory from the scratch buffers provided.
+///
+/// The scratchBuffer and scratchBufferSize fields
+/// should be populated according to the requirements of each backend. For
+/// example, if using the DirectX 12 backend you should call the
+/// ffxGetScratchMemorySizeDX12 function. It is not required
+/// that custom backend implementations use a scratch buffer.
+///
+/// Any functional addition to this interface mandates a version
+/// bump to ensure full functionality across effects and backends.
+///
+/// @ingroup FfxInterface
+typedef struct FfxInterface {
+
+ // FidelityFX SDK 1.0 callback handles
+ FfxGetSDKVersionFunc fpGetSDKVersion; ///< A callback function to query the SDK version.
+ FfxGetEffectGpuMemoryUsageFunc fpGetEffectGpuMemoryUsage; ///< A callback function to query effect Gpu memory usage
+ FfxCreateBackendContextFunc fpCreateBackendContext; ///< A callback function to create and initialize the backend context.
+ FfxGetDeviceCapabilitiesFunc fpGetDeviceCapabilities; ///< A callback function to query device capabilites.
+ FfxDestroyBackendContextFunc fpDestroyBackendContext; ///< A callback function to destroy the backendcontext. This also dereferences the device.
+ FfxCreateResourceFunc fpCreateResource; ///< A callback function to create a resource.
+ FfxRegisterResourceFunc fpRegisterResource; ///< A callback function to register an external resource.
+ FfxGetResourceFunc fpGetResource; ///< A callback function to convert an internal resource to external resource type
+ FfxUnregisterResourcesFunc fpUnregisterResources; ///< A callback function to unregister external resource.
+ FfxRegisterStaticResourceFunc fpRegisterStaticResource; ///< A callback function to register a static resource.
+ FfxGetResourceDescriptionFunc fpGetResourceDescription; ///< A callback function to retrieve a resource description.
+ FfxDestroyResourceFunc fpDestroyResource; ///< A callback function to destroy a resource.
+ FfxMapResourceFunc fpMapResource; ///< A callback function to map a resource.
+ FfxUnmapResourceFunc fpUnmapResource; ///< A callback function to unmap a resource.
+ FfxStageConstantBufferDataFunc fpStageConstantBufferDataFunc; ///< A callback function to copy constant buffer data into staging memory.
+ FfxCreatePipelineFunc fpCreatePipeline; ///< A callback function to create a render or compute pipeline.
+ FfxDestroyPipelineFunc fpDestroyPipeline; ///< A callback function to destroy a render or compute pipeline.
+ FfxScheduleGpuJobFunc fpScheduleGpuJob; ///< A callback function to schedule a render job.
+ FfxExecuteGpuJobsFunc fpExecuteGpuJobs; ///< A callback function to execute all queued render jobs.
+
+ // FidelityFX SDK 1.1 callback handles
+ FfxBreadcrumbsAllocBlockFunc fpBreadcrumbsAllocBlock; ///< A callback function to allocate block of memory for AMD FidelityFX Breadcrumbs Library buffer.
+ FfxBreadcrumbsFreeBlockFunc fpBreadcrumbsFreeBlock; ///< A callback function to free AMD FidelityFX Breadcrumbs Library buffer.
+ FfxBreadcrumbsWriteFunc fpBreadcrumbsWrite; ///< A callback function to write marker into AMD FidelityFX Breadcrumbs Library.
+ FfxBreadcrumbsPrintDeviceInfoFunc fpBreadcrumbsPrintDeviceInfo; ///< A callback function to print active GPU info for AMD FidelityFX Breadcrumbs Library log.
+
+ FfxGetPermutationBlobByIndexFunc fpGetPermutationBlobByIndex;
+ FfxSwapChainConfigureFrameGenerationFunc fpSwapChainConfigureFrameGeneration; ///< A callback function to configure swap chain present callback.
+
+ FfxRegisterConstantBufferAllocatorFunc fpRegisterConstantBufferAllocator; ///< A callback function to register a custom Thread Safe constant buffer allocator.
+
+ void* scratchBuffer; ///< A preallocated buffer for memory utilized internally by the backend.
+ size_t scratchBufferSize; ///< Size of the buffer pointed to by scratchBuffer.
+ FfxDevice device; ///< A backend specific device
+
+} FfxInterface;
+
+#if defined(__cplusplus)
+}
+#endif // #if defined(__cplusplus)
diff --git a/thirdparty/amd-ffx/ffx_message.cpp b/thirdparty/amd-ffx/ffx_message.cpp
new file mode 100644
index 000000000000..66755ba19bde
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_message.cpp
@@ -0,0 +1,69 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2025 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "ffx_message.h"
+
+// GODOT BEGINS
+// On non-Windows Platforms this file uses the macro `FFX_UNUSED`, we have to include it here
+#include "ffx_util.h"
+// GODOT ENDS
+
+#ifdef _WIN32
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include // required for OutputDebugString()
+#endif // #ifdef _WIN32
+
+static ffxMessageCallback s_messageCallback;
+static uint32_t s_debugLevel;
+
+// set the printing callback function
+void ffxSetPrintMessageCallback(ffxMessageCallback callback, uint32_t debugLevel)
+{
+ s_messageCallback = callback;
+ s_debugLevel = debugLevel;
+ return;
+}
+
+void ffxPrintMessage(uint32_t type, const wchar_t* message)
+{
+#ifdef _WIN32
+ if (!s_messageCallback) {
+ // Format the message string
+ wchar_t buffer[512];
+ if (type == FFX_MESSAGE_TYPE_ERROR) {
+ swprintf_s(buffer, 512, L"FSR_API_DEBUG_ERROR: %ls\n", message);
+ }
+ else if (type == FFX_MESSAGE_TYPE_WARNING) {
+ swprintf_s(buffer, 512, L"FSR_API_DEBUG_WARNING: %ls\n", message);
+ }
+ OutputDebugStringW(buffer);
+ } else {
+ s_messageCallback(type, message);
+ }
+#else
+ FFX_UNUSED(type);
+ FFX_UNUSED(message);
+#endif
+ return;
+}
diff --git a/thirdparty/amd-ffx/ffx_message.h b/thirdparty/amd-ffx/ffx_message.h
new file mode 100644
index 000000000000..442849f4d3b8
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_message.h
@@ -0,0 +1,63 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+#include "ffx_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // #ifdef __cplusplus
+
+/// @defgroup Messages Messages
+/// Messages used by FidelityFX SDK functions
+///
+/// @ingroup ffxHost
+
+/// Provides the ability to set a callback for print messages.
+///
+/// @param [in] callback The callback function that will receive assert messages.
+///
+/// @ingroup Messages
+FFX_API void ffxSetPrintMessageCallback(ffxMessageCallback callback, uint32_t debugLevel);
+
+/// Function to print a message.
+///
+/// @param [in] type See FfxMsgType
+/// @param [in] message The message to print.
+///
+/// @ingroup Messages
+FFX_API void ffxPrintMessage(uint32_t type, const wchar_t* message);
+
+/// Macro to print message
+/// by calling application registered callback,
+/// otherwise to debugger's TTY
+///
+/// @ingroup Messages
+#define FFX_PRINT_MESSAGE( type, msg) \
+ do \
+ { \
+ ffxPrintMessage( type, msg); \
+ } while (0)
+#ifdef __cplusplus
+}
+#endif // #ifdef __cplusplus
diff --git a/thirdparty/amd-ffx/ffx_object_management.cpp b/thirdparty/amd-ffx/ffx_object_management.cpp
new file mode 100644
index 000000000000..016cd0ed1b94
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_object_management.cpp
@@ -0,0 +1,48 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "ffx_interface.h"
+#include "ffx_object_management.h"
+
+void ffxSafeReleasePipeline(FfxInterface* backendInterface, FfxPipelineState* pipeline, FfxUInt32 effectContextId)
+{
+ FFX_ASSERT(pipeline);
+ FFX_ASSERT(backendInterface->fpDestroyPipeline);
+
+ backendInterface->fpDestroyPipeline(backendInterface, pipeline, effectContextId);
+}
+
+void ffxSafeReleaseCopyResource(FfxInterface* backendInterface, FfxResourceInternal resource, FfxUInt32 effectContextId)
+{
+ FFX_ASSERT(backendInterface->fpDestroyResource);
+
+ FfxResourceInternal copyResource;
+ copyResource.internalIndex = resource.internalIndex + 1;
+ backendInterface->fpDestroyResource(backendInterface, copyResource, effectContextId);
+}
+
+void ffxSafeReleaseResource(FfxInterface* backendInterface, FfxResourceInternal resource, FfxUInt32 effectContextId)
+{
+ FFX_ASSERT(backendInterface->fpDestroyResource);
+
+ backendInterface->fpDestroyResource(backendInterface, resource, effectContextId);
+}
diff --git a/thirdparty/amd-ffx/ffx_object_management.h b/thirdparty/amd-ffx/ffx_object_management.h
new file mode 100644
index 000000000000..b665ffa847d9
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_object_management.h
@@ -0,0 +1,38 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+#include "ffx_types.h"
+#include "ffx_interface.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif // #if defined(__cplusplus)
+
+FFX_API void ffxSafeReleasePipeline(FfxInterface* backendInterface, FfxPipelineState* pipeline, FfxUInt32 effectContextId);
+FFX_API void ffxSafeReleaseCopyResource(FfxInterface* backendInterface, FfxResourceInternal resource, FfxUInt32 effectContextId);
+FFX_API void ffxSafeReleaseResource(FfxInterface* backendInterface, FfxResourceInternal resource, FfxUInt32 effectContextId);
+
+#if defined(__cplusplus)
+}
+#endif // #if defined(__cplusplus)
diff --git a/thirdparty/amd-ffx/ffx_opticalflow.cpp b/thirdparty/amd-ffx/ffx_opticalflow.cpp
new file mode 100644
index 000000000000..3f0ed0f02ee1
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_opticalflow.cpp
@@ -0,0 +1,987 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include // for max used inside SPD CPU code.
+#include // for fabs, abs, sinf, sqrt, etc.
+#include // for memset
+#include // for FLT_EPSILON
+#include "ffx_opticalflow.h"
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wunused-function"
+#pragma clang diagnostic ignored "-Wsign-compare"
+#endif
+
+#define FFX_CPU
+#include "gpu/ffx_core.h"
+#include "gpu/spd/ffx_spd.h"
+#include "gpu/opticalflow/ffx_opticalflow_callbacks_hlsl.h"
+#include "ffx_object_management.h"
+
+#define FFX_OPTICALFLOW_MAX_QUEUED_FRAMES 16
+
+#include "ffx_opticalflow_private.h"
+
+typedef struct Binding
+{
+ uint32_t index;
+ wchar_t name[64];
+}Binding;
+
+static const Binding srvBindingNames[] =
+{
+ {FFX_OF_BINDING_IDENTIFIER_INPUT_COLOR, L"r_input_color"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT, L"r_optical_flow_input"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS_INPUT, L"r_optical_flow_previous_input"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW, L"r_optical_flow"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS, L"r_optical_flow_previous"},
+};
+
+static const Binding uavBindingNames[] =
+{
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT, L"rw_optical_flow_input"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_1, L"rw_optical_flow_input_level_1"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_2, L"rw_optical_flow_input_level_2"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_3, L"rw_optical_flow_input_level_3"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_4, L"rw_optical_flow_input_level_4"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_5, L"rw_optical_flow_input_level_5"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_6, L"rw_optical_flow_input_level_6"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW, L"rw_optical_flow"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_NEXT_LEVEL, L"rw_optical_flow_next_level"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_HISTOGRAM, L"rw_optical_flow_scd_histogram"}, // scene change detection histogram
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM, L"rw_optical_flow_scd_previous_histogram"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_TEMP, L"rw_optical_flow_scd_temp"},
+ {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT, L"rw_optical_flow_scd_output"},
+};
+
+static const Binding cbBindingNames[] =
+{
+ {FFX_OPTICALFLOW_CONSTANTBUFFER_IDENTIFIER, L"cbOF"},
+ {FFX_OPTICALFLOW_CONSTANTBUFFER_IDENTIFIER_SPD, L"cbOF_SPD"}
+};
+
+// Broad structure of the root signature.
+typedef enum OpticalFlowRootSignatureLayout {
+
+ OPTICALFLOW_ROOT_SIGNATURE_LAYOUT_UAVS,
+ OPTICALFLOW_ROOT_SIGNATURE_LAYOUT_SRVS,
+ OPTICALFLOW_ROOT_SIGNATURE_LAYOUT_CONSTANTS,
+ OPTICALFLOW_ROOT_SIGNATURE_LAYOUT_CONSTANTS_REGISTER_1,
+ OPTICALFLOW_ROOT_SIGNATURE_LAYOUT_PARAMETER_COUNT
+} OpticalFlowRootSignatureLayout;
+
+typedef struct OpticalFlowSpdConstants
+{
+ uint32_t mips;
+ uint32_t numworkGroups;
+ uint32_t workGroupOffset[2];
+
+ uint32_t numworkGroupsOpticalFlowInputPyramid;
+ uint32_t pad0_;
+ uint32_t pad1_;
+ uint32_t pad2_;
+
+} OpticalFlowSpdConstants;
+
+static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
+{
+ for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvTextureCount; ++srvIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(srvBindingNames); ++mapIndex)
+ {
+ if (0 == wcscmp(srvBindingNames[mapIndex].name, inoutPipeline->srvTextureBindings[srvIndex].name))
+ break;
+ }
+ FFX_ASSERT(mapIndex < _countof(srvBindingNames));
+ if (mapIndex == _countof(srvBindingNames))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->srvTextureBindings[srvIndex].resourceIdentifier = srvBindingNames[mapIndex].index;
+ }
+
+ for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavTextureCount; ++uavIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(uavBindingNames); ++mapIndex)
+ {
+ if (0 == wcscmp(uavBindingNames[mapIndex].name, inoutPipeline->uavTextureBindings[uavIndex].name))
+ break;
+ }
+ FFX_ASSERT(mapIndex < _countof(uavBindingNames));
+ if (mapIndex == _countof(uavBindingNames))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->uavTextureBindings[uavIndex].resourceIdentifier = uavBindingNames[mapIndex].index;
+ }
+
+ for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex)
+ {
+ int32_t mapIndex = 0;
+ for (mapIndex = 0; mapIndex < _countof(cbBindingNames); ++mapIndex)
+ {
+ if (0 == wcscmp(cbBindingNames[mapIndex].name, inoutPipeline->constantBufferBindings[cbIndex].name))
+ break;
+ }
+ FFX_ASSERT(mapIndex < _countof(cbBindingNames));
+ if (mapIndex == _countof(cbBindingNames))
+ return FFX_ERROR_INVALID_ARGUMENT;
+
+ inoutPipeline->constantBufferBindings[cbIndex].resourceIdentifier = cbBindingNames[mapIndex].index;
+ }
+
+ return FFX_OK;
+}
+
+static uint32_t getPipelinePermutationFlags(uint32_t, FfxPass, bool fp16, bool force64, bool)
+{
+ uint32_t flags = 0;
+ flags |= (force64) ? OPTICALFLOW_SHADER_PERMUTATION_FORCE_WAVE64 : 0;
+ flags |= (fp16) ? OPTICALFLOW_SHADER_PERMUTATION_ALLOW_FP16 : 0;
+ return flags;
+}
+
+static FfxErrorCode createPipelineStates(FfxOpticalflowContext_Private* context)
+{
+ FFX_ASSERT(context);
+
+ constexpr size_t samplerCount = 2;
+ FfxSamplerDescription samplerDescs[samplerCount] = {
+ {FFX_FILTER_TYPE_MINMAGMIP_POINT, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE},
+ {FFX_FILTER_TYPE_MINMAGMIP_LINEAR, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE} };
+
+ const size_t rootConstantCount = 2;
+ FfxRootConstantDescription rootConstantDescs[2] = { {sizeof(OpticalflowConstants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE},
+ {sizeof(OpticalFlowSpdConstants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE} };
+ FfxPipelineDescription pipelineDescription = {};
+ pipelineDescription.stage = FFX_BIND_COMPUTE_SHADER_STAGE;
+ pipelineDescription.contextFlags = context->contextDescription.flags;
+ pipelineDescription.samplerCount = samplerCount;
+ pipelineDescription.samplers = samplerDescs;
+ pipelineDescription.rootConstantBufferCount = rootConstantCount;
+ pipelineDescription.rootConstants = rootConstantDescs;
+
+ FfxDeviceCapabilities capabilities;
+ context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &capabilities);
+
+ bool haveShaderModel66 = capabilities.maximumSupportedShaderModel >= FFX_SHADER_MODEL_6_6;
+ bool supportedFP16 = capabilities.fp16Supported;
+ bool canForceWave64 = false;
+ bool useLut = false;
+
+ const uint32_t waveLaneCountMin = capabilities.waveLaneCountMin;
+ const uint32_t waveLaneCountMax = capabilities.waveLaneCountMax;
+ if (waveLaneCountMin == 32 && waveLaneCountMax == 64)
+ {
+ useLut = true;
+ canForceWave64 = haveShaderModel66;
+ }
+ else
+ canForceWave64 = false;
+
+ uint32_t contextFlags = context->contextDescription.flags;
+
+ auto CreateComputePipeline = [&](FfxPass pass, const wchar_t* name, FfxPipelineState* pipeline) -> FfxErrorCode {
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, pipeline, context->effectContextId);
+ wcscpy_s(pipelineDescription.name, name);
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(
+ &context->contextDescription.backendInterface,
+ FFX_EFFECT_OPTICALFLOW,
+ pass,
+ getPipelinePermutationFlags(contextFlags, pass, supportedFP16, canForceWave64, useLut),
+ &pipelineDescription,
+ context->effectContextId,
+ pipeline));
+
+ patchResourceBindings(pipeline);
+ return FFX_OK;
+ };
+
+ CreateComputePipeline(FFX_OPTICALFLOW_PASS_GENERATE_OPTICAL_FLOW_INPUT_PYRAMID, L"Opticalflow_InputPyramid", & context->pipelineGenerateOpticalFlowInputPyramid);
+ pipelineDescription.rootConstantBufferCount = 1;
+ CreateComputePipeline(FFX_OPTICALFLOW_PASS_PREPARE_LUMA, L"Opticalflow_Luma", &context->pipelinePrepareLuma);
+ CreateComputePipeline(FFX_OPTICALFLOW_PASS_GENERATE_SCD_HISTOGRAM, L"Opticalflow_SCD_Histogram", &context->pipelineGenerateSCDHistogram);
+ CreateComputePipeline(FFX_OPTICALFLOW_PASS_COMPUTE_SCD_DIVERGENCE, L"Opticalflow_SCD_Divergence", &context->pipelineComputeSCDDivergence);
+ CreateComputePipeline(FFX_OPTICALFLOW_PASS_COMPUTE_OPTICAL_FLOW_ADVANCED_V5, L"Opticalflow_Search", &context->pipelineComputeOpticalFlowAdvancedV5);
+ CreateComputePipeline(FFX_OPTICALFLOW_PASS_FILTER_OPTICAL_FLOW_V5, L"Opticalflow_Filter", &context->pipelineFilterOpticalFlowV5);
+ CreateComputePipeline(FFX_OPTICALFLOW_PASS_SCALE_OPTICAL_FLOW_ADVANCED_V5, L"Opticalflow_Upscale", &context->pipelineScaleOpticalFlowAdvancedV5);
+
+ return FFX_OK;
+}
+
+constexpr uint32_t OpticalFlowMaxPyramidLevels = 7;
+constexpr uint32_t HistogramBins = 256;
+constexpr uint32_t HistogramsPerDim = 3;
+constexpr uint32_t HistogramShifts = 3;
+
+static FfxDimensions2D GetOpticalFlowTextureSize(const FfxDimensions2D& displaySize, const uint32_t opticalFlowBlockSize)
+{
+ uint32_t width = (displaySize.width + opticalFlowBlockSize - 1) / opticalFlowBlockSize;
+ uint32_t height = (displaySize.height + opticalFlowBlockSize - 1) / opticalFlowBlockSize;
+ return { width, height };
+}
+
+static FfxDimensions2D GetOpticalFlowHistogramSize(int level)
+{
+ const uint32_t searchRadius = 8;
+ uint32_t maxVelocity = searchRadius * (1 << (OpticalFlowMaxPyramidLevels - 1 - level));
+ uint32_t binsPerDimension = 2 * maxVelocity + 1;
+ return { binsPerDimension, binsPerDimension };
+}
+
+static FfxDimensions2D GetGlobalMotionSearchDispatchSize(int level)
+{
+ const uint32_t threadGroupSizeX = 16;
+ const uint32_t threadGroupSizeY = 16;
+ const FfxDimensions2D opticalFlowHistogramSize = GetOpticalFlowHistogramSize(level);
+ const uint32_t additionalElementsDueToShiftsX = opticalFlowHistogramSize.width / threadGroupSizeX;
+ const uint32_t additionalElementsDueToShiftsY = opticalFlowHistogramSize.height / threadGroupSizeY;
+ const uint32_t dispatchX = (opticalFlowHistogramSize.width + additionalElementsDueToShiftsX + threadGroupSizeX - 1) / threadGroupSizeX;
+ const uint32_t dispatchY = (opticalFlowHistogramSize.height + additionalElementsDueToShiftsY + threadGroupSizeY - 1) / threadGroupSizeY;
+ return { dispatchX, dispatchY };
+}
+
+static uint32_t GetSCDHistogramTextureWidth()
+{
+ return HistogramBins * (HistogramsPerDim * HistogramsPerDim);
+}
+
+static FfxErrorCode opticalflowCreate(FfxOpticalflowContext_Private* context, const FfxOpticalflowContextDescription* contextDescription)
+{
+ FFX_ASSERT(context);
+ FFX_ASSERT(contextDescription);
+ FfxErrorCode errorCode = FFX_OK;
+
+ memset(context, 0, sizeof(FfxOpticalflowContext_Private));
+ context->device = contextDescription->backendInterface.device;
+
+ memcpy(&context->contextDescription, contextDescription, sizeof(FfxOpticalflowContextDescription));
+
+ // Check version info - make sure we are linked with the right backend version
+ FfxVersionNumber version = context->contextDescription.backendInterface.fpGetSDKVersion(&context->contextDescription.backendInterface);
+ FFX_RETURN_ON_ERROR(version == FFX_SDK_MAKE_VERSION(1, 1, 4), FFX_ERROR_INVALID_VERSION);
+
+ errorCode = context->contextDescription.backendInterface.fpCreateBackendContext(&context->contextDescription.backendInterface, FFX_EFFECT_OPTICALFLOW, nullptr, &context->effectContextId);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ errorCode = context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &context->deviceCapabilities);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ context->firstExecution = true;
+ context->resourceFrameIndex = 0;
+
+ context->constants.inputLumaResolution[0] = context->contextDescription.resolution.width;
+ context->constants.inputLumaResolution[1] = context->contextDescription.resolution.height;
+
+ FfxDimensions2D opticalFlowInputTextureSize = context->contextDescription.resolution;
+
+ const FfxResourceType texture1dResourceType = (context->contextDescription.flags & FFX_OPTICALFLOW_ENABLE_TEXTURE1D_USAGE) ? FFX_RESOURCE_TYPE_TEXTURE1D : FFX_RESOURCE_TYPE_TEXTURE2D;
+
+ uint32_t minBlockSize = 8;
+ const FfxDimensions2D opticalFlowTextureSize = GetOpticalFlowTextureSize(contextDescription->resolution, minBlockSize);
+
+ const FfxDimensions2D opticalFlowLevel1TextureSize = { FFX_ALIGN_UP(opticalFlowTextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowTextureSize.height, 2) / 2 };
+ const FfxDimensions2D opticalFlowLevel2TextureSize = { FFX_ALIGN_UP(opticalFlowLevel1TextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowLevel1TextureSize.height, 2) / 2 };
+ const FfxDimensions2D opticalFlowLevel3TextureSize = { FFX_ALIGN_UP(opticalFlowLevel2TextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowLevel2TextureSize.height, 2) / 2 };
+ const FfxDimensions2D opticalFlowLevel4TextureSize = { FFX_ALIGN_UP(opticalFlowLevel3TextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowLevel3TextureSize.height, 2) / 2 };
+ const FfxDimensions2D opticalFlowLevel5TextureSize = { FFX_ALIGN_UP(opticalFlowLevel4TextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowLevel4TextureSize.height, 2) / 2 };
+ const FfxDimensions2D opticalFlowLevel6TextureSize = { FFX_ALIGN_UP(opticalFlowLevel5TextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowLevel5TextureSize.height, 2) / 2 };
+ const FfxDimensions2D opticalFlowLevel7TextureSize = { FFX_ALIGN_UP(opticalFlowLevel6TextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowLevel6TextureSize.height, 2) / 2 };
+
+ const FfxDimensions2D opticalFlowHistogramTextureSize = GetOpticalFlowHistogramSize(0);
+
+ const FfxDimensions2D globalMotionSearchMaxDispatchSize = GetGlobalMotionSearchDispatchSize(0);
+ const uint32_t globalMotionSearchTextureWidth = 4 + (globalMotionSearchMaxDispatchSize.width * globalMotionSearchMaxDispatchSize.height);
+
+ const FfxInternalResourceDescription internalSurfaceDesc[] = {
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1, L"OPTICALFLOW_OpticalFlowInput1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width, opticalFlowInputTextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_1, L"OPTICALFLOW_OpticalFlowInput1Level1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 1, opticalFlowInputTextureSize.height >> 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_2, L"OPTICALFLOW_OpticalFlowInput1Level2", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 2, opticalFlowInputTextureSize.height >> 2, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_3, L"OPTICALFLOW_OpticalFlowInput1Level3", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 3, opticalFlowInputTextureSize.height >> 3, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_4, L"OPTICALFLOW_OpticalFlowInput1Level4", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 4, opticalFlowInputTextureSize.height >> 4, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_5, L"OPTICALFLOW_OpticalFlowInput1Level5", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 5, opticalFlowInputTextureSize.height >> 5, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_6, L"OPTICALFLOW_OpticalFlowInput1Level6", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 6, opticalFlowInputTextureSize.height >> 6, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2, L"OPTICALFLOW_OpticalFlowInput2", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width, opticalFlowInputTextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_1, L"OPTICALFLOW_OpticalFlowInput2Level1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 1, opticalFlowInputTextureSize.height >> 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_2, L"OPTICALFLOW_OpticalFlowInput2Level2", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 2, opticalFlowInputTextureSize.height >> 2, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_3, L"OPTICALFLOW_OpticalFlowInput2Level3", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 3, opticalFlowInputTextureSize.height >> 3, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_4, L"OPTICALFLOW_OpticalFlowInput2Level4", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 4, opticalFlowInputTextureSize.height >> 4, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_5, L"OPTICALFLOW_OpticalFlowInput2Level5", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 5, opticalFlowInputTextureSize.height >> 5, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_6, L"OPTICALFLOW_OpticalFlowInput2Level6", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 6, opticalFlowInputTextureSize.height >> 6, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1, L"OPTICALFLOW_OpticalFlow1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowTextureSize.width, opticalFlowTextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_1, L"OPTICALFLOW_OpticalFlow1Level1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel1TextureSize.width, opticalFlowLevel1TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_2, L"OPTICALFLOW_OpticalFlow1Level2", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel2TextureSize.width, opticalFlowLevel2TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_3, L"OPTICALFLOW_OpticalFlow1Level3", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel3TextureSize.width, opticalFlowLevel3TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_4, L"OPTICALFLOW_OpticalFlow1Level4", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel4TextureSize.width, opticalFlowLevel4TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_5, L"OPTICALFLOW_OpticalFlow1Level5", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel5TextureSize.width, opticalFlowLevel5TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_6, L"OPTICALFLOW_OpticalFlow1Level6", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel6TextureSize.width, opticalFlowLevel6TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2, L"OPTICALFLOW_OpticalFlow2", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowTextureSize.width, opticalFlowTextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_1, L"OPTICALFLOW_OpticalFlow2Level1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel1TextureSize.width, opticalFlowLevel1TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_2, L"OPTICALFLOW_OpticalFlow2Level2", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel2TextureSize.width, opticalFlowLevel2TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_3, L"OPTICALFLOW_OpticalFlow2Level3", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel3TextureSize.width, opticalFlowLevel3TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_4, L"OPTICALFLOW_OpticalFlow2Level4", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel4TextureSize.width, opticalFlowLevel4TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_5, L"OPTICALFLOW_OpticalFlow2Level5", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel5TextureSize.width, opticalFlowLevel5TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_6, L"OPTICALFLOW_OpticalFlow2Level6", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel6TextureSize.width, opticalFlowLevel6TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_HISTOGRAM, L"OPTICALFLOW_OpticalFlowSCDHistogram", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R32_UINT, GetSCDHistogramTextureWidth(), 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM, L"OPTICALFLOW_OpticalFlowSCDPreviousHistogram", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R32_FLOAT, GetSCDHistogramTextureWidth(), 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+
+ { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_TEMP, L"OPTICALFLOW_OpticalFlowSCDTemp", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV,
+ FFX_SURFACE_FORMAT_R32_UINT, 3, 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} },
+ };
+
+ memset(context->resources, 0, sizeof(context->resources));
+
+ for (int32_t currentSurfaceIndex = 0; currentSurfaceIndex < FFX_ARRAY_ELEMENTS(internalSurfaceDesc); ++currentSurfaceIndex) {
+
+ const FfxInternalResourceDescription* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex];
+ const FfxResourceType resourceType = currentSurfaceDescription->height > 1 ? FFX_RESOURCE_TYPE_TEXTURE2D : texture1dResourceType;
+ const FfxResourceDescription resourceDescription = {
+ resourceType, currentSurfaceDescription->format,
+ currentSurfaceDescription->width, currentSurfaceDescription->height, 1,
+ currentSurfaceDescription->mipCount, FFX_RESOURCE_FLAGS_NONE, currentSurfaceDescription->usage };
+ const FfxResourceStates initialState = FFX_RESOURCE_STATE_UNORDERED_ACCESS;
+ const FfxCreateResourceDescription createResourceDescription = {
+ FFX_HEAP_TYPE_DEFAULT, resourceDescription, initialState, currentSurfaceDescription->name, currentSurfaceDescription->id, currentSurfaceDescription->initData };
+
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpCreateResource(
+ &context->contextDescription.backendInterface,
+ &createResourceDescription,
+ context->effectContextId,
+ &context->resources[currentSurfaceDescription->id]));
+ }
+
+ memset(context->srvBindings, 0, sizeof(context->srvBindings));
+ memset(context->uavBindings, 0, sizeof(context->uavBindings));
+
+ {
+ context->refreshPipelineStates = false;
+ errorCode = createPipelineStates(context);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+ }
+
+ return FFX_OK;
+}
+
+static FfxErrorCode opticalflowRelease(FfxOpticalflowContext_Private* context)
+{
+ FFX_ASSERT(context);
+
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelinePrepareLuma, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineGenerateOpticalFlowInputPyramid, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineGenerateSCDHistogram, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineComputeSCDDivergence, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineComputeOpticalFlowAdvancedV5, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFilterOpticalFlowV5, context->effectContextId);
+ ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineScaleOpticalFlowAdvancedV5, context->effectContextId);
+
+ for (int32_t currentResourceIndex = 0; currentResourceIndex < FFX_OF_RESOURCE_IDENTIFIER_COUNT; ++currentResourceIndex) {
+
+ ffxSafeReleaseResource(&context->contextDescription.backendInterface, context->resources[currentResourceIndex], context->effectContextId);
+ }
+
+ context->contextDescription.backendInterface.fpDestroyBackendContext(&context->contextDescription.backendInterface, context->effectContextId);
+
+ return FFX_OK;
+}
+
+static void scheduleDispatch(FfxOpticalflowContext_Private* context, const FfxPipelineState* pipeline, const wchar_t* pipelineName, uint32_t dispatchX, uint32_t dispatchY, uint32_t dispatchZ = 1)
+{
+ FfxComputeJobDescription jobDescriptor = {};
+
+ for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) {
+
+ const uint32_t bindingIdentifier = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier;
+ const FfxResourceInternal currentResource = context->srvBindings[bindingIdentifier];
+ jobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource;
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.srvTextures[currentShaderResourceViewIndex].name, pipeline->srvTextureBindings[currentShaderResourceViewIndex].name);
+#endif
+
+ FFX_ASSERT(bindingIdentifier != FFX_OF_BINDING_IDENTIFIER_NULL);
+ FFX_ASSERT(bindingIdentifier < FFX_OF_BINDING_IDENTIFIER_COUNT);
+ }
+
+ for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavTextureCount; ++currentUnorderedAccessViewIndex) {
+
+ const uint32_t bindingIdentifier = pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].resourceIdentifier;
+ const FfxResourceInternal currentResource = context->uavBindings[bindingIdentifier];
+ jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource;
+ jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = 0;
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].name, pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].name);
+#endif
+
+ FFX_ASSERT(bindingIdentifier != FFX_OF_BINDING_IDENTIFIER_NULL);
+ FFX_ASSERT(bindingIdentifier < FFX_OF_BINDING_IDENTIFIER_COUNT);
+ }
+
+ jobDescriptor.dimensions[0] = dispatchX;
+ jobDescriptor.dimensions[1] = dispatchY;
+ jobDescriptor.dimensions[2] = dispatchZ;
+ jobDescriptor.pipeline = *pipeline;
+
+ for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) {
+#ifdef FFX_DEBUG
+ wcscpy_s(jobDescriptor.cbNames[currentRootConstantIndex], pipeline->constantBufferBindings[currentRootConstantIndex].name);
+#endif
+ jobDescriptor.cbs[currentRootConstantIndex] = context->constantBuffers[pipeline->constantBufferBindings[currentRootConstantIndex].resourceIdentifier];
+ }
+
+ FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE };
+ wcscpy_s(dispatchJob.jobLabel, pipelineName);
+ dispatchJob.computeJobDescriptor = jobDescriptor;
+
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &dispatchJob);
+}
+
+static FfxErrorCode dispatch(FfxOpticalflowContext_Private* context, const FfxOpticalflowDispatchDescription* params)
+{
+ context->contextDescription.backendInterface.fpRegisterResource(
+ &context->contextDescription.backendInterface,
+ ¶ms->opticalFlowVector,
+ context->effectContextId,
+ &context->uavBindings[FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_VECTOR]);
+ context->contextDescription.backendInterface.fpRegisterResource(
+ &context->contextDescription.backendInterface,
+ ¶ms->opticalFlowSCD,
+ context->effectContextId,
+ &context->uavBindings[FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_SCD_OUTPUT]);
+
+ context->contextDescription.backendInterface.fpRegisterResource(
+ &context->contextDescription.backendInterface,
+ ¶ms->color,
+ context->effectContextId,
+ &context->srvBindings[FFX_OF_BINDING_IDENTIFIER_INPUT_COLOR]);
+
+ FfxCommandList commandList = params->commandList;
+ int advancedAlgorithmIterations = 7;
+ uint32_t opticalFlowBlockSize = 8;
+
+ if (context->refreshPipelineStates) {
+
+ context->refreshPipelineStates = false;
+
+ const FfxErrorCode errorCode = createPipelineStates(context);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+ }
+
+ const FfxResourceDescription resourceDescInputColor = context->contextDescription.backendInterface.fpGetResourceDescription(
+ &context->contextDescription.backendInterface,
+ context->srvBindings[FFX_OF_BINDING_IDENTIFIER_INPUT_COLOR]);
+ FFX_ASSERT(resourceDescInputColor.type == FFX_RESOURCE_TYPE_TEXTURE2D);
+
+ context->constants.backbufferTransferFunction = params->backbufferTransferFunction;
+ context->constants.minMaxLuminance[0] = params->minMaxLuminance.x;
+ context->constants.minMaxLuminance[1] = params->minMaxLuminance.y;
+
+ const bool resetAccumulation = params->reset || context->firstExecution;
+ context->firstExecution = false;
+
+ if (resetAccumulation) {
+ context->constants.frameIndex = 0;
+ }
+ else {
+ context->constants.frameIndex++;
+ }
+
+ if (resetAccumulation)
+ {
+ const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f };
+ FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT };
+ memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float));
+
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow SCD Temp");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_TEMP];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ clearJob.clearJobDescriptor.target = context->uavBindings[FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_SCD_OUTPUT];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow SCD Histogram");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_HISTOGRAM];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow SCD Previous histogram");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1 Level 1");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_1];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1 Level 2");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_2];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1 Level 3");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_3];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1 Level 4");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_4];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1 Level 5");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_5];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1 Level 6");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_6];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2 Level 1");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_1];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2 Level 2");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_2];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2 Level 3");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_3];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2 Level 4");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_4];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2 Level 5");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_5];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2 Level 6");
+ clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_6];
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob);
+ }
+
+ uint32_t resolutionMultiplier = 1;
+
+ FfxUInt32x2 threadGroupSizeOpticalFlowInputPyramid;
+ FfxUInt32x2 workGroupOffset;
+ FfxUInt32x2 numWorkGroupsAndMips;
+ FfxUInt32x4 rectInfo = { 0, 0,
+ context->contextDescription.resolution.width * resolutionMultiplier,
+ context->contextDescription.resolution.height * resolutionMultiplier };
+ ffxSpdSetup(threadGroupSizeOpticalFlowInputPyramid, workGroupOffset, numWorkGroupsAndMips, rectInfo, 4);
+
+ OpticalFlowSpdConstants luminancePyramidConstants;
+ luminancePyramidConstants.numworkGroups = numWorkGroupsAndMips[0];
+ luminancePyramidConstants.mips = numWorkGroupsAndMips[1];
+ luminancePyramidConstants.workGroupOffset[0] = workGroupOffset[0];
+ luminancePyramidConstants.workGroupOffset[1] = workGroupOffset[1];
+ luminancePyramidConstants.numworkGroupsOpticalFlowInputPyramid = numWorkGroupsAndMips[0];
+
+ context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, &context->constants, sizeof(context->constants), &context->constantBuffers[FFX_OPTICALFLOW_CONSTANTBUFFER_IDENTIFIER]);
+ context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, &luminancePyramidConstants, sizeof(luminancePyramidConstants), &context->constantBuffers[FFX_OPTICALFLOW_CONSTANTBUFFER_IDENTIFIER_SPD]);
+
+ {
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_HISTOGRAM] = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_HISTOGRAM];
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM] = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM];
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_TEMP] = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_TEMP];
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT] = context->uavBindings[FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_SCD_OUTPUT];
+
+ const bool isOddFrame = !!(context->resourceFrameIndex & 1);
+
+ uint32_t opticalFlowInputResourceIndex = isOddFrame ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1;
+ uint32_t opticalFlowPreviousInputResourceIndex = isOddFrame ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2;
+
+ uint32_t opticalFlowResourceIndex = isOddFrame ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1;
+ uint32_t opticalFlowPreviousResourceIndex = isOddFrame ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2;
+
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT] = context->resources[opticalFlowInputResourceIndex];
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_1] = context->resources[opticalFlowInputResourceIndex + 1];
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_2] = context->resources[opticalFlowInputResourceIndex + 2];
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_3] = context->resources[opticalFlowInputResourceIndex + 3];
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_4] = context->resources[opticalFlowInputResourceIndex + 4];
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_5] = context->resources[opticalFlowInputResourceIndex + 5];
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_6] = context->resources[opticalFlowInputResourceIndex + 6];
+
+ context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT] = context->resources[opticalFlowInputResourceIndex];
+ context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS_INPUT] = context->resources[opticalFlowPreviousInputResourceIndex];
+
+ context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW] = context->resources[opticalFlowResourceIndex];
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW] = context->resources[opticalFlowResourceIndex];
+ context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS] = context->resources[opticalFlowPreviousResourceIndex];
+
+ {
+ int32_t threadGroupSizeX = 16;
+ int32_t threadGroupSizeY = 16;
+ uint32_t threadPixelsX = 2;
+ uint32_t threadPixelsY = 2;
+ int32_t dispatchX = ((context->contextDescription.resolution.width + (threadPixelsX - 1)) / threadPixelsX + (threadGroupSizeX - 1)) / threadGroupSizeX;
+ int32_t dispatchY = ((context->contextDescription.resolution.height + (threadPixelsY - 1)) / threadPixelsY + (threadGroupSizeY - 1)) / threadGroupSizeY;
+ scheduleDispatch(context, &context->pipelinePrepareLuma, L"OF PrepareLuma", dispatchX, dispatchY);
+ }
+
+ {
+ {
+ scheduleDispatch(context,
+ &context->pipelineGenerateOpticalFlowInputPyramid,
+ L"OF GenerateOpticalFlowInputPyramid",
+ threadGroupSizeOpticalFlowInputPyramid[0],
+ threadGroupSizeOpticalFlowInputPyramid[1]
+ );
+ }
+
+ {
+ {
+ const uint32_t threadGroupSizeX = 32;
+ const uint32_t threadGroupSizeY = 8;
+ const uint32_t strataWidth = (context->contextDescription.resolution.width / 4) / HistogramsPerDim;
+ const uint32_t strataHeight = context->contextDescription.resolution.height / HistogramsPerDim;
+ const uint32_t dispatchX = (strataWidth + threadGroupSizeX - 1) / threadGroupSizeX;
+ const uint32_t dispatchY = 16;
+ const uint32_t dispatchZ = HistogramsPerDim * HistogramsPerDim;
+ scheduleDispatch(context, &context->pipelineGenerateSCDHistogram, L"OF GenerateSCDHistogram", dispatchX, dispatchY, dispatchZ);
+ }
+ {
+ const uint32_t dispatchX = HistogramsPerDim * HistogramsPerDim;
+ const uint32_t dispatchY = HistogramShifts;
+ scheduleDispatch(context, &context->pipelineComputeSCDDivergence, L"OF ComputeSCDDivergence", dispatchX, dispatchY);
+ }
+ }
+
+ FfxDimensions2D opticalFlowTextureSizes[OpticalFlowMaxPyramidLevels];
+ const int pyramidMaxIterations = advancedAlgorithmIterations;
+ FFX_ASSERT(pyramidMaxIterations <= OpticalFlowMaxPyramidLevels);
+
+ opticalFlowTextureSizes[0] = GetOpticalFlowTextureSize(context->contextDescription.resolution, opticalFlowBlockSize);
+ for (int i = 1; i < pyramidMaxIterations; i++)
+ {
+ opticalFlowTextureSizes[i] = {
+ (opticalFlowTextureSizes[i - 1].width + 1) / 2,
+ (opticalFlowTextureSizes[i - 1].height + 1) / 2
+ };
+ }
+
+ for (int level = pyramidMaxIterations - 1; level >= 0; level--)
+ {
+ bool isOddLevel = !!(level & 1);
+
+ uint32_t opticalFlowInputResourceIndexA = isOddFrame ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1;
+ uint32_t opticalFlowInputResourceIndexB = isOddFrame ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2;
+ uint32_t opticalFlowResourceIndexA = (isOddFrame != isOddLevel) ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1;
+ uint32_t opticalFlowResourceIndexB = (isOddFrame != isOddLevel) ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2;
+ context->constants.opticalFlowPyramidLevel = level;
+ context->constants.opticalFlowPyramidLevelCount = pyramidMaxIterations;
+
+ context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, &context->constants, sizeof(context->constants), &context->constantBuffers[FFX_OPTICALFLOW_CONSTANTBUFFER_IDENTIFIER]);
+
+ context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT] = context->resources[opticalFlowInputResourceIndexA + level];
+ context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS_INPUT] = context->resources[opticalFlowInputResourceIndexB + level];
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW] = context->resources[opticalFlowResourceIndexA + level];
+
+ context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS] = context->resources[opticalFlowResourceIndexB + level];
+
+ {
+ const FfxUInt32 inputLumaWidth = ffxMax(context->contextDescription.resolution.width >> level, 1);
+ const FfxUInt32 inputLumaHeight = ffxMax(context->contextDescription.resolution.height >> level, 1);
+ std::wstring pipelineName = L"OF " + std::to_wstring(level) + L" Search";
+
+ {
+ uint32_t threadPixels = 4;
+ FFX_ASSERT(opticalFlowBlockSize >= threadPixels);
+ uint32_t threadGroupSizeY = 16;
+ uint32_t threadGroupSize = 64;
+ uint32_t dispatchX = ((inputLumaWidth + threadPixels - 1) / threadPixels * threadGroupSizeY + (threadGroupSize - 1)) / threadGroupSize;
+ uint32_t dispatchY = (inputLumaHeight + (threadGroupSizeY - 1)) / threadGroupSizeY;
+ scheduleDispatch(context, &context->pipelineComputeOpticalFlowAdvancedV5, pipelineName.c_str(), dispatchX, dispatchY);
+ }
+ }
+
+ {
+ context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS] = context->resources[opticalFlowResourceIndexA + level];
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW] = context->resources[opticalFlowResourceIndexB + level];
+ }
+
+ {
+ if (level == 0)
+ {
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW] = context->uavBindings[FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_VECTOR];
+ }
+
+ const uint32_t levelWidth = opticalFlowTextureSizes[level].width;
+ const uint32_t levelHeight = opticalFlowTextureSizes[level].height;
+
+ const uint32_t threadGroupSizeX = 16;
+ const uint32_t threadGroupSizeY = 4;
+ const uint32_t dispatchX = (levelWidth + threadGroupSizeX - 1) / threadGroupSizeX;
+ const uint32_t dispatchY = (levelHeight + threadGroupSizeY - 1) / threadGroupSizeY;
+ std::wstring pipelineName = L"OF " + std::to_wstring(level) + L" Filter";
+
+ {
+ scheduleDispatch(context, &context->pipelineFilterOpticalFlowV5, pipelineName.c_str(), dispatchX, dispatchY);
+ }
+ }
+
+ if (level > 0)
+ {
+ context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_1 + level - 1] = context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW];
+ }
+
+ if (level > 0)
+ {
+ {
+ context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW] = context->resources[opticalFlowResourceIndexB + level];
+ context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_NEXT_LEVEL] = level > 0 ? context->resources[opticalFlowResourceIndexB + level - 1] : FfxResourceInternal{ FFX_OF_RESOURCE_IDENTIFIER_NULL };
+ }
+
+ FFX_ASSERT(opticalFlowBlockSize >= 2);
+ const uint32_t nextLevelWidth = opticalFlowTextureSizes[level - 1].width;
+ const uint32_t nextLevelHeight = opticalFlowTextureSizes[level - 1].height;
+
+ const uint32_t threadGroupSizeX = opticalFlowBlockSize / 2;
+ const uint32_t threadGroupSizeY = opticalFlowBlockSize / 2;
+ const uint32_t threadGroupSizeZ = 4;
+ const uint32_t dispatchX = (nextLevelWidth + threadGroupSizeX - 1) / threadGroupSizeX;
+ const uint32_t dispatchY = (nextLevelHeight + threadGroupSizeY - 1) / threadGroupSizeY;
+ const uint32_t dispatchZ = 1;
+ std::wstring pipelineName = L"OF " + std::to_wstring(level) + L" Scale";
+
+ {
+ const uint32_t dispatchX = (nextLevelWidth + 3) / 4;
+ const uint32_t dispatchY = (nextLevelHeight + 3) / 4;
+ scheduleDispatch(context, &context->pipelineScaleOpticalFlowAdvancedV5, pipelineName.c_str(), dispatchX, dispatchY, dispatchZ);
+ }
+
+ {
+ FfxGpuJobDescription barrierJob = {FFX_GPU_JOB_BARRIER};
+ barrierJob.barrierDescriptor = { context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ }
+ }
+
+ {
+ FfxGpuJobDescription barrierJob = {FFX_GPU_JOB_BARRIER};
+ barrierJob.barrierDescriptor = { context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ }
+ }
+ }
+ }
+
+ {
+ FfxGpuJobDescription barrierJob = {FFX_GPU_JOB_BARRIER};
+
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1 Level 1");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_1], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1 Level 2");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_2], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1 Level 3");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_3], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1 Level 4");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_4], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1 Level 5");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_5], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1 Level 6");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_6], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2 Level 1");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_1], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2 Level 2");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_2], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2 Level 3");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_3], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2 Level 4");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_4], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2 Level 5");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_5], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2 Level 6");
+ barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_6], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0};
+ context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob);
+ }
+
+ context->resourceFrameIndex = (context->resourceFrameIndex + 1) % FFX_OPTICALFLOW_MAX_QUEUED_FRAMES;
+
+ FFX_VALIDATE(context->contextDescription.backendInterface.fpExecuteGpuJobs(&context->contextDescription.backendInterface, commandList, context->effectContextId));
+
+ context->contextDescription.backendInterface.fpUnregisterResources(&context->contextDescription.backendInterface, commandList, context->effectContextId);
+
+ return FFX_OK;
+}
+
+FfxErrorCode ffxOpticalflowContextCreate(FfxOpticalflowContext* context, FfxOpticalflowContextDescription* contextDescription)
+{
+ FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(contextDescription, FFX_ERROR_INVALID_POINTER);
+
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetSDKVersion, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+
+ if (contextDescription->backendInterface.scratchBuffer) {
+
+ FFX_RETURN_ON_ERROR(contextDescription->backendInterface.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE);
+ }
+
+ FFX_STATIC_ASSERT(sizeof(FfxOpticalflowContext) >= sizeof(FfxOpticalflowContext_Private));
+
+ FfxOpticalflowContext_Private* contextPrivate = (FfxOpticalflowContext_Private*)(context);
+ FfxErrorCode errorCode = opticalflowCreate(contextPrivate, contextDescription);
+
+ return errorCode;
+}
+
+FFX_API FfxErrorCode ffxOpticalflowContextGetGpuMemoryUsage(FfxOpticalflowContext* context, FfxEffectMemoryUsage* vramUsage)
+{
+ FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(vramUsage, FFX_ERROR_INVALID_POINTER);
+ FfxOpticalflowContext_Private* contextPrivate = (FfxOpticalflowContext_Private*)(context);
+
+ FFX_RETURN_ON_ERROR(contextPrivate->device, FFX_ERROR_NULL_DEVICE);
+
+ FfxErrorCode errorCode = contextPrivate->contextDescription.backendInterface.fpGetEffectGpuMemoryUsage(
+ &contextPrivate->contextDescription.backendInterface, contextPrivate->effectContextId, vramUsage);
+ FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+ return FFX_OK;
+}
+
+FfxErrorCode ffxOpticalflowContextDestroy(FfxOpticalflowContext* context)
+{
+ FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER);
+
+ FfxOpticalflowContext_Private* contextPrivate = (FfxOpticalflowContext_Private*)(context);
+ const FfxErrorCode errorCode = opticalflowRelease(contextPrivate);
+
+ return errorCode;
+}
+
+FFX_API bool ffxOpticalflowResourceIsNull(FfxResource resource)
+{
+ return resource.resource == NULL;
+}
+
+FFX_API FfxErrorCode ffxOpticalflowGetSharedResourceDescriptions(FfxOpticalflowContext* context, FfxOpticalflowSharedResourceDescriptions* SharedResources)
+{
+ FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(SharedResources, FFX_ERROR_INVALID_POINTER);
+
+ FfxOpticalflowContext_Private* contextPrivate = (FfxOpticalflowContext_Private*)(context);
+ const FfxDimensions2D opticalFlowTextureSize = GetOpticalFlowTextureSize(contextPrivate->contextDescription.resolution, 8);
+ const FfxDimensions2D globalMotionSearchMaxDispatchSize = GetGlobalMotionSearchDispatchSize(0);
+ const uint32_t globalMotionSearchTextureWidth = 4 /* predefined slots */ + (globalMotionSearchMaxDispatchSize.width * globalMotionSearchMaxDispatchSize.height);
+
+ SharedResources->opticalFlowVector = {
+ FFX_HEAP_TYPE_DEFAULT,
+ { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowTextureSize.width, opticalFlowTextureSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, FFX_RESOURCE_USAGE_UAV },
+ FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"OPTICALFLOW_Result", 0, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} };
+
+ SharedResources->opticalFlowSCD = {
+ FFX_HEAP_TYPE_DEFAULT,
+ { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R32_UINT, 3, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, FFX_RESOURCE_USAGE_UAV },
+ FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"OPTICALFLOW_SCDOutput", 0, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} };
+
+ return FFX_OK;
+}
+
+FfxErrorCode ffxOpticalflowContextDispatch(FfxOpticalflowContext* context, const FfxOpticalflowDispatchDescription* dispatchParams)
+{
+ FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(dispatchParams, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(dispatchParams->commandList, FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(!ffxOpticalflowResourceIsNull(dispatchParams->color), FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(dispatchParams->color.description.type == FFX_RESOURCE_TYPE_TEXTURE2D, FFX_ERROR_INVALID_ARGUMENT);
+ FFX_RETURN_ON_ERROR(!ffxOpticalflowResourceIsNull(dispatchParams->opticalFlowVector), FFX_ERROR_INVALID_POINTER);
+ FFX_RETURN_ON_ERROR(!ffxOpticalflowResourceIsNull(dispatchParams->opticalFlowSCD), FFX_ERROR_INVALID_POINTER);
+
+ FfxOpticalflowContext_Private* contextPrivate = (FfxOpticalflowContext_Private*)(context);
+
+ FFX_RETURN_ON_ERROR(contextPrivate->device, FFX_ERROR_NULL_DEVICE);
+ FFX_RETURN_ON_ERROR(dispatchParams->color.description.width <= contextPrivate->contextDescription.resolution.width, FFX_ERROR_INVALID_ARGUMENT);
+ FFX_RETURN_ON_ERROR(dispatchParams->color.description.height <= contextPrivate->contextDescription.resolution.height, FFX_ERROR_INVALID_ARGUMENT);
+
+ const FfxErrorCode errorCode = dispatch(contextPrivate, dispatchParams);
+ return errorCode;
+}
+
+FFX_API FfxVersionNumber ffxOpticalflowGetEffectVersion()
+{
+ return FFX_SDK_MAKE_VERSION(FFX_OPTICALFLOW_VERSION_MAJOR, FFX_OPTICALFLOW_VERSION_MINOR, FFX_OPTICALFLOW_VERSION_PATCH);
+}
diff --git a/thirdparty/amd-ffx/ffx_opticalflow.h b/thirdparty/amd-ffx/ffx_opticalflow.h
new file mode 100644
index 000000000000..772bf7c71243
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_opticalflow.h
@@ -0,0 +1,212 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// @defgroup OpticalFlow
+
+#pragma once
+
+// Include the interface for the backend of the OpticalFlow API.
+#include "ffx_interface.h"
+
+/// FidelityFX OpticalFlow major version.
+///
+/// @ingroup ffxOpticalflow
+#define FFX_OPTICALFLOW_VERSION_MAJOR (1)
+
+/// FidelityFX OpticalFlow minor version.
+///
+/// @ingroup ffxOpticalflow
+#define FFX_OPTICALFLOW_VERSION_MINOR (1)
+
+/// FidelityFX OpticalFlow patch version.
+///
+/// @ingroup ffxOpticalflow
+#define FFX_OPTICALFLOW_VERSION_PATCH (2)
+
+/// FidelityFX Optical Flow context count
+///
+/// Defines the number of internal effect contexts required by Optical Flow
+///
+/// @ingroup ffxOpticalFlow
+#define FFX_OPTICALFLOW_CONTEXT_COUNT (1)
+
+/// The size of the context specified in 32bit size units.
+///
+/// @ingroup ffxOpticalflow
+#define FFX_OPTICALFLOW_CONTEXT_SIZE (FFX_SDK_DEFAULT_CONTEXT_SIZE)
+
+#if defined(__cplusplus)
+extern "C" {
+#endif // #if defined(__cplusplus)
+
+/// An enumeration of all the passes which constitute the OpticalFlow algorithm.
+///
+/// @ingroup ffxOpticalflow
+typedef enum FfxOpticalflowPass
+{
+ FFX_OPTICALFLOW_PASS_PREPARE_LUMA = 0,
+ FFX_OPTICALFLOW_PASS_GENERATE_OPTICAL_FLOW_INPUT_PYRAMID,
+ FFX_OPTICALFLOW_PASS_GENERATE_SCD_HISTOGRAM,
+ FFX_OPTICALFLOW_PASS_COMPUTE_SCD_DIVERGENCE,
+ FFX_OPTICALFLOW_PASS_COMPUTE_OPTICAL_FLOW_ADVANCED_V5,
+ FFX_OPTICALFLOW_PASS_FILTER_OPTICAL_FLOW_V5,
+ FFX_OPTICALFLOW_PASS_SCALE_OPTICAL_FLOW_ADVANCED_V5,
+
+ FFX_OPTICALFLOW_PASS_COUNT
+} FfxOpticalflowPass;
+
+/// An enumeration of bit flags used when creating a
+/// FfxOpticalflowContext. See FfxOpticalflowDispatchDescription.
+///
+/// @ingroup ffxOpticalflow
+typedef enum FfxOpticalflowInitializationFlagBits
+{
+ FFX_OPTICALFLOW_ENABLE_TEXTURE1D_USAGE = (1 << 0),
+
+} FfxOpticalflowInitializationFlagBits;
+
+/// A structure encapsulating the parameters required to initialize
+/// FidelityFX OpticalFlow.
+///
+/// @ingroup ffxOpticalflow
+typedef struct FfxOpticalflowContextDescription {
+
+ FfxInterface backendInterface; ///< A set of pointers to the backend implementation for FidelityFX SDK
+ uint32_t flags; ///< A collection of FfxOpticalflowInitializationFlagBits.
+ FfxDimensions2D resolution;
+} FfxOpticalflowContextDescription;
+
+/// A structure encapsulating the parameters for dispatching the various passes
+/// of FidelityFX Opticalflow.
+///
+/// @ingroup ffxOpticalflow
+typedef struct FfxOpticalflowDispatchDescription
+{
+ FfxCommandList commandList; ///< The FfxCommandList to record rendering commands into.
+ FfxResource color; ///< A FfxResource containing the input color buffer
+ FfxResource opticalFlowVector; ///< A FfxResource containing the output motion buffer
+ FfxResource opticalFlowSCD; ///< A FfxResource containing the output scene change detection buffer
+ bool reset; ///< A boolean value which when set to true, indicates the camera has moved discontinuously.
+ int backbufferTransferFunction;
+ FfxFloatCoords2D minMaxLuminance;
+} FfxOpticalflowDispatchDescription;
+
+typedef struct FfxOpticalflowSharedResourceDescriptions {
+
+ FfxCreateResourceDescription opticalFlowVector;
+ FfxCreateResourceDescription opticalFlowSCD;
+
+} FfxOpticalflowSharedResourceDescriptions;
+
+/// A structure encapsulating the FidelityFX OpticalFlow context.
+///
+/// This sets up an object which contains all persistent internal data and
+/// resources that are required by OpticalFlow.
+///
+/// The FfxOpticalflowContext object should have a lifetime matching
+/// your use of OpticalFlow. Before destroying the OpticalFlow context care should be taken
+/// to ensure the GPU is not accessing the resources created or used by OpticalFlow.
+/// It is therefore recommended that the GPU is idle before destroying OpticalFlow
+/// OpticalFlow context.
+///
+/// @ingroup ffxOpticalflow
+typedef struct FfxOpticalflowContext
+{
+ uint32_t data[FFX_OPTICALFLOW_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context.
+} FfxOpticalflowContext;
+
+
+/// Create a FidelityFX OpticalFlow context from the parameters
+/// programmed to the FfxOpticalflowContextDescription structure.
+///
+/// The context structure is the main object used to interact with the OpticalFlow
+/// API, and is responsible for the management of the internal resources used
+/// by the OpticalFlow algorithm. When this API is called, multiple calls will be
+/// made via the pointers contained in the callbacks structure.
+/// These callbacks will attempt to retreive the device capabilities, and
+/// create the internal resources, and pipelines required by OpticalFlow's
+/// frame-to-frame function. Depending on the precise configuration used when
+/// creating the FfxOpticalflowContext a different set of resources and
+/// pipelines might be requested via the callback functions.
+///
+/// The flags included in the flags field of
+/// FfxOpticalflowContext how match the configuration of your
+/// application as well as the intended use of OpticalFlow. It is important that these
+/// flags are set correctly (as well as a correct programmed
+/// FfxOpticalflowContextDescription) to ensure correct operation. It is
+/// recommended to consult the overview documentation for further details on
+/// how OpticalFlow should be integerated into an application.
+///
+/// When the FfxOpticalflowContext is created, you should use the
+/// ffxOpticalflowContextDispatch function each frame where FSR3
+/// upscaling should be applied. See the documentation of
+/// ffxOpticalflowContextDispatch for more details.
+///
+/// The FfxOpticalflowContext should be destroyed when use of it is
+/// completed, typically when an application is unloaded or OpticalFlow is
+/// disabled by a user. To destroy the OpticalFlow context you should call
+/// ffxOpticalflowContextDestroy.
+///
+/// @param [out] context A pointer to a FfxOpticalflowContext structure to populate.
+/// @param [in] contextDescription A pointer to a FfxOpticalflowContextDescription structure.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or contextDescription was NULL.
+/// @retval
+/// FFX_ERROR_INCOMPLETE_INTERFACE The operation failed because the FfxOpticalflowContextDescription.callbacks was not fully specified.
+/// @retval
+/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend.
+///
+/// @ingroup ffxOpticalflow
+FFX_API FfxErrorCode ffxOpticalflowContextCreate(FfxOpticalflowContext* context, FfxOpticalflowContextDescription* contextDescription);
+
+FFX_API FfxErrorCode ffxOpticalflowContextGetGpuMemoryUsage(FfxOpticalflowContext* pContext, FfxEffectMemoryUsage* vramUsage);
+
+FFX_API FfxErrorCode ffxOpticalflowGetSharedResourceDescriptions(FfxOpticalflowContext* context, FfxOpticalflowSharedResourceDescriptions* SharedResources);
+
+FFX_API FfxErrorCode ffxOpticalflowContextDispatch(FfxOpticalflowContext* context, const FfxOpticalflowDispatchDescription* dispatchDescription);
+
+/// Destroy the FidelityFX OpticalFlow context.
+///
+/// @param [out] context A pointer to a FfxOpticalflowContext structure to destroy.
+///
+/// @retval
+/// FFX_OK The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context was NULL.
+///
+/// @ingroup ffxOpticalflow
+FFX_API FfxErrorCode ffxOpticalflowContextDestroy(FfxOpticalflowContext* context);
+
+/// Queries the effect version number.
+///
+/// @returns
+/// The SDK version the effect was built with.
+///
+/// @ingroup ffxOpticalflow
+FFX_API FfxVersionNumber ffxOpticalflowGetEffectVersion();
+
+#if defined(__cplusplus)
+}
+#endif // #if defined(__cplusplus)
diff --git a/thirdparty/amd-ffx/ffx_opticalflow_private.h b/thirdparty/amd-ffx/ffx_opticalflow_private.h
new file mode 100644
index 000000000000..a325288755d9
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_opticalflow_private.h
@@ -0,0 +1,108 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+#define FFX_CPU
+#include "gpu/opticalflow/ffx_opticalflow_resources.h"
+
+typedef enum OpticalFlowBindingIdentifiers
+{
+ FFX_OF_BINDING_IDENTIFIER_NULL = 0,
+ FFX_OF_BINDING_IDENTIFIER_INPUT_COLOR,
+
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_1,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_2,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_3,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_4,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_5,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_6,
+
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS_INPUT,
+
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_HISTOGRAM,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_TEMP,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT,
+
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_NEXT_LEVEL,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS,
+
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_1,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_2,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_3,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_4,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_5,
+ FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_6,
+
+ FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_VECTOR,
+ FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_SCD_OUTPUT,
+
+ FFX_OF_BINDING_IDENTIFIER_COUNT
+} OpticalFlowBindingIdentifiers;
+
+typedef enum OpticalflowShaderPermutationOptions
+{
+ OPTICALFLOW_SHADER_PERMUTATION_FORCE_WAVE64 = (1 << 0), ///< doesn't map to a define, selects different table
+ OPTICALFLOW_SHADER_PERMUTATION_ALLOW_FP16 = (1 << 1), ///< Enables fast math computations where possible
+ OPTICALFLOW_HDR_COLOR_INPUT = (1 << 2),
+} OpticalflowShaderPermutationOptions;
+
+typedef struct OpticalflowConstants
+{
+ int32_t inputLumaResolution[2];
+ uint32_t opticalFlowPyramidLevel;
+ uint32_t opticalFlowPyramidLevelCount;
+
+ int32_t frameIndex;
+ uint32_t backbufferTransferFunction;
+ float minMaxLuminance[2];
+} OpticalflowConstants;
+
+typedef struct FfxOpticalflowContext_Private
+{
+ FfxOpticalflowContextDescription contextDescription;
+ FfxUInt32 effectContextId;
+ OpticalflowConstants constants;
+ FfxDevice device;
+ FfxDeviceCapabilities deviceCapabilities;
+
+ FfxPipelineState pipelinePrepareLuma;
+ FfxPipelineState pipelineGenerateOpticalFlowInputPyramid;
+ FfxPipelineState pipelineGenerateSCDHistogram;
+ FfxPipelineState pipelineComputeSCDDivergence;
+ FfxPipelineState pipelineComputeOpticalFlowAdvancedV5;
+ FfxPipelineState pipelineFilterOpticalFlowV5;
+ FfxPipelineState pipelineScaleOpticalFlowAdvancedV5;
+
+ FfxResourceInternal resources[FFX_OF_RESOURCE_IDENTIFIER_COUNT];
+ FfxResourceInternal srvBindings[FFX_OF_BINDING_IDENTIFIER_COUNT];
+ FfxResourceInternal uavBindings[FFX_OF_BINDING_IDENTIFIER_COUNT];
+
+ FfxConstantBuffer constantBuffers[FFX_OPTICALFLOW_CONSTANTBUFFER_COUNT];
+
+ bool firstExecution;
+ bool refreshPipelineStates;
+ uint32_t resourceFrameIndex;
+} FfxOpticalflowContext_Private;
diff --git a/thirdparty/amd-ffx/ffx_types.h b/thirdparty/amd-ffx/ffx_types.h
new file mode 100644
index 000000000000..dfd6ef8e045f
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_types.h
@@ -0,0 +1,1319 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+#include
+
+///
+/// @defgroup ffxSDK SDK
+/// The SDK module provides detailed descriptions of the various class, structs, and function which comprise the FidelityFX SDK. It is divided into several sub-modules.
+///
+
+/// @defgroup ffxHost Host
+/// The FidelityFX SDK host (CPU-side) references
+///
+/// @ingroup ffxSDK
+
+/// @defgroup Defines Defines
+/// Top level defines used by the FidelityFX SDK
+///
+/// @ingroup ffxHost
+
+// When defining custom mutex you have to also define:
+// FFX_MUTEX_LOCK - for exclusive locking of mutex
+// FFX_MUTEX_LOCK_SHARED - for shared locking of mutex
+// FFX_MUTEX_UNLOCK - for exclusive unlocking of mutex
+// FFX_MUTEX_UNLOCK_SHARED - for shared unlocking of mutex
+//
+// If your mutex type doesn't support shared locking mechanism you can rely
+// on exclusive locks only (define _SHARED variants to the same exclusive operation).
+#ifndef FFX_MUTEX
+#if __cplusplus >= 201703L
+#include
+/// FidelityFX mutex wrapper.
+///
+/// @ingroup SDKTypes
+#define FFX_MUTEX std::shared_mutex
+#define FFX_MUTEX_IMPL_SHARED
+#else
+#include
+/// FidelityFX mutex wrapper.
+///
+/// @ingroup SDKTypes
+#define FFX_MUTEX std::mutex
+#define FFX_MUTEX_IMPL_STANDARD
+#endif // #if __cplusplus >= 201703L
+#endif // #ifndef FFX_MUTEX
+
+#if defined(FFX_GCC) || !defined(FFX_BUILD_AS_DLL)
+/// FidelityFX exported functions
+///
+/// @ingroup Defines
+#define FFX_API
+#else
+/// FidelityFX exported functions
+///
+/// @ingroup Defines
+#define FFX_API __declspec(dllexport)
+#endif // #if defined (FFX_GCC)
+
+// GODOT BEGINS
+// On non-Windows platforms `wchar_t` is 32 bytes rather than 16 bytes,
+// So we increased the size of the context by 2x.
+#define FFX_SDK_DEFAULT_CONTEXT_SIZE (1024 * 256)
+// GODOT ENDS
+
+/// Maximum supported number of simultaneously bound SRVs.
+///
+/// @ingroup Defines
+#define FFX_MAX_NUM_SRVS 64
+
+/// Maximum supported number of simultaneously bound UAVs.
+///
+/// @ingroup Defines
+#define FFX_MAX_NUM_UAVS 64
+
+/// Maximum number of constant buffers bound.
+///
+/// @ingroup Defines
+#define FFX_MAX_NUM_CONST_BUFFERS 3
+
+/// Maximum number of characters in a resource name
+///
+/// @ingroup Defines
+#define FFX_RESOURCE_NAME_SIZE 64
+
+/// Maximum number of queued frames in the backend
+///
+/// @ingroup Defines
+#define FFX_MAX_QUEUED_FRAMES (4)
+
+/// Maximum number of resources per effect context
+///
+/// @ingroup Defines
+#define FFX_MAX_RESOURCE_COUNT (512)
+
+/// Maximum number of passes per effect component
+///
+/// @ingroup Defines
+#define FFX_MAX_PASS_COUNT (50)
+
+/// Total number of descriptors in ring buffer needed for a single effect context
+///
+/// @ingroup Defines
+#define FFX_RING_BUFFER_DESCRIPTOR_COUNT (FFX_MAX_QUEUED_FRAMES * FFX_MAX_PASS_COUNT * FFX_MAX_RESOURCE_COUNT)
+
+/// Size of constant buffer entry in the ring buffer table
+///
+/// @ingroup Defines
+#define FFX_BUFFER_SIZE (4096)
+
+/// Total constant buffer ring buffer size for a single effect context
+///
+/// @ingroup Defines
+#define FFX_CONSTANT_BUFFER_RING_BUFFER_SIZE (FFX_MAX_QUEUED_FRAMES * FFX_MAX_PASS_COUNT * FFX_BUFFER_SIZE)
+
+/// Maximum number of barriers per flush
+///
+/// @ingroup Defines
+#define FFX_MAX_BARRIERS (128)
+
+/// Maximum number of GPU jobs per submission
+///
+/// @ingroup Defines
+#define FFX_MAX_GPU_JOBS (256)
+
+/// Maximum number of samplers supported
+///
+/// @ingroup Defines
+#define FFX_MAX_SAMPLERS (16)
+
+/// Maximum number of simultaneous upload jobs
+///
+/// @ingroup Defines
+#define UPLOAD_JOB_COUNT (16)
+
+// GODOT BEGINS
+
+// Disable warnings also for non-MSVC compilers
+#if defined(_MSC_VER) && !defined(__clang__)
+// Off by default warnings
+#pragma warning(disable : 4365 4710 4820 5039)
+#else
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wmissing-braces"
+#pragma GCC diagnostic ignored "-Wswitch"
+#endif
+
+// GODOT ENDS
+
+#ifdef __cplusplus
+extern "C" {
+#endif // #ifdef __cplusplus
+
+/// @defgroup CPUTypes CPU Types
+/// CPU side type defines for all commonly used variables
+///
+/// @ingroup ffxHost
+
+/// A typedef for version numbers returned from functions in the FidelityFX SDK.
+///
+/// @ingroup CPUTypes
+ typedef uint32_t FfxVersionNumber;
+
+/// A typedef for a boolean value.
+///
+/// @ingroup CPUTypes
+typedef bool FfxBoolean;
+
+/// A typedef for a unsigned 8bit integer.
+///
+/// @ingroup CPUTypes
+typedef uint8_t FfxUInt8;
+
+/// A typedef for a unsigned 16bit integer.
+///
+/// @ingroup CPUTypes
+typedef uint16_t FfxUInt16;
+
+/// A typedef for a unsigned 32bit integer.
+///
+/// @ingroup CPUTypes
+typedef uint32_t FfxUInt32;
+
+/// A typedef for a unsigned 64bit integer.
+///
+/// @ingroup CPUTypes
+typedef uint64_t FfxUInt64;
+
+/// A typedef for a signed 8bit integer.
+///
+/// @ingroup CPUTypes
+typedef int8_t FfxInt8;
+
+/// A typedef for a signed 16bit integer.
+///
+/// @ingroup CPUTypes
+typedef int16_t FfxInt16;
+
+/// A typedef for a signed 32bit integer.
+///
+/// @ingroup CPUTypes
+typedef int32_t FfxInt32;
+
+/// A typedef for a signed 64bit integer.
+///
+/// @ingroup CPUTypes
+typedef int64_t FfxInt64;
+
+/// A typedef for a floating point value.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32;
+
+/// A typedef for a 2-dimensional floating point value.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32x2[2];
+
+/// A typedef for a 3-dimensional floating point value.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32x3[3];
+
+/// A typedef for a 4-dimensional floating point value.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32x4[4];
+
+/// A typedef for a 4x4 floating point matrix.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32x4x4[16];
+
+/// A typedef for a 2-dimensional 32bit unsigned integer.
+///
+/// @ingroup CPUTypes
+typedef uint32_t FfxUInt32x2[2];
+
+/// A typedef for a 3-dimensional 32bit unsigned integer.
+///
+/// @ingroup CPUTypes
+typedef uint32_t FfxUInt32x3[3];
+
+/// A typedef for a 4-dimensional 32bit unsigned integer.
+///
+/// @ingroup CPUTypes
+typedef uint32_t FfxUInt32x4[4];
+
+/// A typedef for a 2-dimensional 32bit signed integer.
+///
+/// @ingroup CPUTypes
+typedef int32_t FfxInt32x2[2];
+
+/// A typedef for a 3-dimensional 32bit signed integer.
+///
+/// @ingroup CPUTypes
+typedef int32_t FfxInt32x3[3];
+
+/// A typedef for a 4-dimensional 32bit signed integer.
+///
+/// @ingroup CPUTypes
+typedef int32_t FfxInt32x4[4];
+
+/// @defgroup SDKTypes SDK Types
+/// Structure and enumeration definitions used by the FidelityFX SDK
+///
+/// @ingroup ffxHost
+
+
+/// An enumeration of surface formats. Needs to match enum FfxApiSurfaceFormat
+///
+/// @ingroup SDKTypes
+typedef enum FfxSurfaceFormat {
+
+ FFX_SURFACE_FORMAT_UNKNOWN, ///< Unknown format
+ FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS, ///< 32 bit per channel, 4 channel typeless format
+ FFX_SURFACE_FORMAT_R32G32B32A32_UINT, ///< 32 bit per channel, 4 channel uint format
+ FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, ///< 32 bit per channel, 4 channel float format
+ FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, ///< 16 bit per channel, 4 channel float format
+ FFX_SURFACE_FORMAT_R32G32B32_FLOAT, ///< 32 bit per channel, 3 channel float format
+ FFX_SURFACE_FORMAT_R32G32_FLOAT, ///< 32 bit per channel, 2 channel float format
+ FFX_SURFACE_FORMAT_R8_UINT, ///< 8 bit per channel, 1 channel float format
+ FFX_SURFACE_FORMAT_R32_UINT, ///< 32 bit per channel, 1 channel float format
+ FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS, ///< 8 bit per channel, 4 channel typeless format
+ FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, ///< 8 bit per channel, 4 channel unsigned normalized format
+ FFX_SURFACE_FORMAT_R8G8B8A8_SNORM, ///< 8 bit per channel, 4 channel signed normalized format
+ FFX_SURFACE_FORMAT_R8G8B8A8_SRGB, ///< 8 bit per channel, 4 channel srgb normalized
+ FFX_SURFACE_FORMAT_B8G8R8A8_TYPELESS, ///< 8 bit per channel, 4 channel typeless format
+ FFX_SURFACE_FORMAT_B8G8R8A8_UNORM, ///< 8 bit per channel, 4 channel unsigned normalized format
+ FFX_SURFACE_FORMAT_B8G8R8A8_SRGB, ///< 8 bit per channel, 4 channel srgb normalized
+ FFX_SURFACE_FORMAT_R11G11B10_FLOAT, ///< 32 bit 3 channel float format
+ FFX_SURFACE_FORMAT_R10G10B10A2_UNORM, ///< 10 bit per 3 channel, 2 bit for 1 channel normalized format
+ FFX_SURFACE_FORMAT_R16G16_FLOAT, ///< 16 bit per channel, 2 channel float format
+ FFX_SURFACE_FORMAT_R16G16_UINT, ///< 16 bit per channel, 2 channel unsigned int format
+ FFX_SURFACE_FORMAT_R16G16_SINT, ///< 16 bit per channel, 2 channel signed int format
+ FFX_SURFACE_FORMAT_R16_FLOAT, ///< 16 bit per channel, 1 channel float format
+ FFX_SURFACE_FORMAT_R16_UINT, ///< 16 bit per channel, 1 channel unsigned int format
+ FFX_SURFACE_FORMAT_R16_UNORM, ///< 16 bit per channel, 1 channel unsigned normalized format
+ FFX_SURFACE_FORMAT_R16_SNORM, ///< 16 bit per channel, 1 channel signed normalized format
+ FFX_SURFACE_FORMAT_R8_UNORM, ///< 8 bit per channel, 1 channel unsigned normalized format
+ FFX_SURFACE_FORMAT_R8G8_UNORM, ///< 8 bit per channel, 2 channel unsigned normalized format
+ FFX_SURFACE_FORMAT_R8G8_UINT, ///< 8 bit per channel, 2 channel unsigned integer format
+ FFX_SURFACE_FORMAT_R32_FLOAT, ///< 32 bit per channel, 1 channel float format
+ FFX_SURFACE_FORMAT_R9G9B9E5_SHAREDEXP, ///< 9 bit per channel, 5 bit exponent format
+
+ FFX_SURFACE_FORMAT_R16G16B16A16_TYPELESS, ///< 16 bit per channel, 4 channel typeless format
+ FFX_SURFACE_FORMAT_R32G32_TYPELESS, ///< 32 bit per channel, 2 channel typeless format
+ FFX_SURFACE_FORMAT_R10G10B10A2_TYPELESS, ///< 10 bit per 3 channel, 2 bit for 1 channel typeless format
+ FFX_SURFACE_FORMAT_R16G16_TYPELESS, ///< 16 bit per channel, 2 channel typeless format
+ FFX_SURFACE_FORMAT_R16_TYPELESS, ///< 16 bit per channel, 1 channel typeless format
+ FFX_SURFACE_FORMAT_R8_TYPELESS, ///< 8 bit per channel, 1 channel typeless format
+ FFX_SURFACE_FORMAT_R8G8_TYPELESS, ///< 8 bit per channel, 2 channel typeless format
+ FFX_SURFACE_FORMAT_R32_TYPELESS, ///< 32 bit per channel, 1 channel typeless format
+} FfxSurfaceFormat;
+
+typedef enum FfxIndexFormat
+{
+ FFX_INDEX_TYPE_UINT32,
+ FFX_INDEX_TYPE_UINT16
+} FfxIndexFormat;
+
+/// An enumeration of resource usage.
+///
+/// @ingroup SDKTypes
+typedef enum FfxResourceUsage {
+
+ FFX_RESOURCE_USAGE_READ_ONLY = 0, ///< No usage flags indicate a resource is read only.
+ FFX_RESOURCE_USAGE_RENDERTARGET = (1<<0), ///< Indicates a resource will be used as render target.
+ FFX_RESOURCE_USAGE_UAV = (1<<1), ///< Indicates a resource will be used as UAV.
+ FFX_RESOURCE_USAGE_DEPTHTARGET = (1<<2), ///< Indicates a resource will be used as depth target.
+ FFX_RESOURCE_USAGE_INDIRECT = (1<<3), ///< Indicates a resource will be used as indirect argument buffer
+ FFX_RESOURCE_USAGE_ARRAYVIEW = (1<<4), ///< Indicates a resource that will generate array views. Works on 2D and cubemap textures
+ FFX_RESOURCE_USAGE_STENCILTARGET = (1<<5), ///< Indicates a resource will be used as stencil target.
+ FFX_RESOURCE_USAGE_DCC_RENDERTARGET = (1<<15), ///< Indicates a resource that should specify optimal render target memory access flags (for console use)
+} FfxResourceUsage;
+
+/// An enumeration of resource states.
+///
+/// @ingroup SDKTypes
+typedef enum FfxResourceStates {
+
+ FFX_RESOURCE_STATE_COMMON = (1 << 0),
+ FFX_RESOURCE_STATE_UNORDERED_ACCESS = (1 << 1), ///< Indicates a resource is in the state to be used as UAV.
+ FFX_RESOURCE_STATE_COMPUTE_READ = (1 << 2), ///< Indicates a resource is in the state to be read by compute shaders.
+ FFX_RESOURCE_STATE_PIXEL_READ = (1 << 3), ///< Indicates a resource is in the state to be read by pixel shaders.
+ FFX_RESOURCE_STATE_PIXEL_COMPUTE_READ = (FFX_RESOURCE_STATE_PIXEL_READ | FFX_RESOURCE_STATE_COMPUTE_READ), ///< Indicates a resource is in the state to be read by pixel or compute shaders.
+ FFX_RESOURCE_STATE_COPY_SRC = (1 << 4), ///< Indicates a resource is in the state to be used as source in a copy command.
+ FFX_RESOURCE_STATE_COPY_DEST = (1 << 5), ///< Indicates a resource is in the state to be used as destination in a copy command.
+ FFX_RESOURCE_STATE_GENERIC_READ = (FFX_RESOURCE_STATE_COPY_SRC | FFX_RESOURCE_STATE_COMPUTE_READ), ///< Indicates a resource is in generic (slow) read state.
+ FFX_RESOURCE_STATE_INDIRECT_ARGUMENT = (1 << 6), ///< Indicates a resource is in the state to be used as an indirect command argument
+ FFX_RESOURCE_STATE_PRESENT = (1 << 7), ///< Indicates a resource is in the state to be used to present to the swap chain
+ FFX_RESOURCE_STATE_RENDER_TARGET = (1 << 8), ///< Indicates a resource is in the state to be used as render target
+ FFX_RESOURCE_STATE_DEPTH_ATTACHEMENT = (1 << 9), ///< Indicates a resource is in the state to be used as depth attachment
+} FfxResourceStates;
+
+/// An enumeration of surface dimensions.
+///
+/// @ingroup SDKTypes
+typedef enum FfxResourceDimension {
+
+ FFX_RESOURCE_DIMENSION_TEXTURE_1D, ///< A resource with a single dimension.
+ FFX_RESOURCE_DIMENSION_TEXTURE_2D, ///< A resource with two dimensions.
+} FfxResourceDimension;
+
+/// An enumeration of resource view dimensions.
+///
+/// @ingroup SDKTypes
+typedef enum FfxResourceViewDimension
+{
+ FFX_RESOURCE_VIEW_DIMENSION_BUFFER, ///< A resource view on a buffer.
+ FFX_RESOURCE_VIEW_DIMENSION_TEXTURE_1D, ///< A resource view on a single dimension.
+ FFX_RESOURCE_VIEW_DIMENSION_TEXTURE_1D_ARRAY, ///< A resource view on a single dimensional array.
+ FFX_RESOURCE_VIEW_DIMENSION_TEXTURE_2D, ///< A resource view on two dimensions.
+ FFX_RESOURCE_VIEW_DIMENSION_TEXTURE_2D_ARRAY, ///< A resource view on two dimensional array.
+ FFX_RESOURCE_VIEW_DIMENSION_TEXTURE_3D, ///< A resource view on three dimensions.
+} FfxResourceViewDimension;
+
+/// An enumeration of surface dimensions.
+///
+/// @ingroup SDKTypes
+typedef enum FfxResourceFlags {
+
+ FFX_RESOURCE_FLAGS_NONE = 0, ///< No flags.
+ FFX_RESOURCE_FLAGS_ALIASABLE = (1 << 0), ///< A bit indicating a resource does not need to persist across frames.
+ FFX_RESOURCE_FLAGS_UNDEFINED = (1 << 1), ///< Special case flag used internally when importing resources that require additional setup
+} FfxResourceFlags;
+
+/// An enumeration of all resource view types.
+///
+/// @ingroup SDKTypes
+typedef enum FfxResourceViewType {
+
+ FFX_RESOURCE_VIEW_UNORDERED_ACCESS, ///< The resource view is an unordered access view (UAV).
+ FFX_RESOURCE_VIEW_SHADER_READ, ///< The resource view is a shader resource view (SRV).
+} FfxResourceViewType;
+
+/// The type of filtering to perform when reading a texture.
+///
+/// @ingroup SDKTypes
+typedef enum FfxFilterType {
+
+ FFX_FILTER_TYPE_MINMAGMIP_POINT, ///< Point sampling.
+ FFX_FILTER_TYPE_MINMAGMIP_LINEAR, ///< Sampling with interpolation.
+ FFX_FILTER_TYPE_MINMAGLINEARMIP_POINT, ///< Use linear interpolation for minification and magnification; use point sampling for mip-level sampling.
+} FfxFilterType;
+
+/// The address mode used when reading a texture.
+///
+/// @ingroup SDKTypes
+typedef enum FfxAddressMode {
+
+ FFX_ADDRESS_MODE_WRAP, ///< Wrap when reading texture.
+ FFX_ADDRESS_MODE_MIRROR, ///< Mirror when reading texture.
+ FFX_ADDRESS_MODE_CLAMP, ///< Clamp when reading texture.
+ FFX_ADDRESS_MODE_BORDER, ///< Border color when reading texture.
+ FFX_ADDRESS_MODE_MIRROR_ONCE, ///< Mirror once when reading texture.
+} FfxAddressMode;
+
+/// An enumeration of all supported shader models.
+///
+/// @ingroup SDKTypes
+typedef enum FfxShaderModel {
+
+ FFX_SHADER_MODEL_5_1, ///< Shader model 5.1.
+ FFX_SHADER_MODEL_6_0, ///< Shader model 6.0.
+ FFX_SHADER_MODEL_6_1, ///< Shader model 6.1.
+ FFX_SHADER_MODEL_6_2, ///< Shader model 6.2.
+ FFX_SHADER_MODEL_6_3, ///< Shader model 6.3.
+ FFX_SHADER_MODEL_6_4, ///< Shader model 6.4.
+ FFX_SHADER_MODEL_6_5, ///< Shader model 6.5.
+ FFX_SHADER_MODEL_6_6, ///< Shader model 6.6.
+ FFX_SHADER_MODEL_6_7, ///< Shader model 6.7.
+} FfxShaderModel;
+
+// An enumeration for different resource types
+///
+/// @ingroup SDKTypes
+typedef enum FfxResourceType {
+
+ FFX_RESOURCE_TYPE_BUFFER, ///< The resource is a buffer.
+ FFX_RESOURCE_TYPE_TEXTURE1D, ///< The resource is a 1-dimensional texture.
+ FFX_RESOURCE_TYPE_TEXTURE2D, ///< The resource is a 2-dimensional texture.
+ FFX_RESOURCE_TYPE_TEXTURE_CUBE, ///< The resource is a cube map.
+ FFX_RESOURCE_TYPE_TEXTURE3D, ///< The resource is a 3-dimensional texture.
+} FfxResourceType;
+
+/// An enumeration for different heap types
+///
+/// @ingroup SDKTypes
+typedef enum FfxHeapType {
+
+ FFX_HEAP_TYPE_DEFAULT = 0, ///< Local memory.
+ FFX_HEAP_TYPE_UPLOAD, ///< Heap used for uploading resources.
+ FFX_HEAP_TYPE_READBACK ///< Heap used for reading back resources.
+} FfxHeapType;
+
+/// An enumeration for different render job types
+///
+/// @ingroup SDKTypes
+typedef enum FfxGpuJobType {
+
+ FFX_GPU_JOB_CLEAR_FLOAT = 0, ///< The GPU job is performing a floating-point clear.
+ FFX_GPU_JOB_COPY = 1, ///< The GPU job is performing a copy.
+ FFX_GPU_JOB_COMPUTE = 2, ///< The GPU job is performing a compute dispatch.
+ FFX_GPU_JOB_BARRIER = 3, ///< The GPU job is performing a barrier.
+
+ FFX_GPU_JOB_DISCARD = 4, ///< The GPU job is performing a floating-point clear.
+
+} FfxGpuJobType;
+
+/// An enumeration for various descriptor types
+///
+/// @ingroup SDKTypes
+typedef enum FfxDescriptorType {
+
+ //FFX_DESCRIPTOR_CBV = 0, // All CBVs currently mapped to root signature
+ //FFX_DESCRIPTOR_SAMPLER, // All samplers currently static
+ FFX_DESCRIPTOR_TEXTURE_SRV = 0,
+ FFX_DESCRIPTOR_BUFFER_SRV,
+ FFX_DESCRIPTOR_TEXTURE_UAV,
+ FFX_DESCRIPTOR_BUFFER_UAV,
+} FfxDescriptiorType;
+
+/// An enumeration for view binding stages
+///
+/// @ingroup SDKTypes
+typedef enum FfxBindStage {
+
+ FFX_BIND_PIXEL_SHADER_STAGE = 1 << 0,
+ FFX_BIND_VERTEX_SHADER_STAGE = 1 << 1,
+ FFX_BIND_COMPUTE_SHADER_STAGE = 1 << 2,
+
+} FfxBindStage;
+
+/// An enumeration for barrier types
+///
+/// @ingroup SDKTypes
+typedef enum FfxBarrierType
+{
+ FFX_BARRIER_TYPE_TRANSITION = 0,
+ FFX_BARRIER_TYPE_UAV,
+} FfxBarrierType;
+
+typedef void (*ffxMessageCallback)(uint32_t type, const wchar_t* message);
+
+/// An enumeration for message types that can be passed
+///
+/// @ingroup SDKTypes
+typedef enum FfxMsgType {
+ FFX_MESSAGE_TYPE_ERROR = 0,
+ FFX_MESSAGE_TYPE_WARNING = 1,
+ FFX_MESSAGE_TYPE_COUNT
+} FfxMsgType;
+
+/// An enumeration of all the effects which constitute the FidelityFX SDK.
+///
+/// Dictates what effect shader blobs to fetch for pipeline creation
+///
+/// @ingroup SDKTypes
+typedef enum FfxEffect
+{
+
+ FFX_EFFECT_FSR2 = 0, ///< FidelityFX Super Resolution v2
+ FFX_EFFECT_FSR1, ///< FidelityFX Super Resolution
+ FFX_EFFECT_SPD, ///< FidelityFX Single Pass Downsampler
+ FFX_EFFECT_BLUR, ///< FidelityFX Blur
+ FFX_EFFECT_BREADCRUMBS, ///< FidelityFX Breadcrumbs
+ FFX_EFFECT_BRIXELIZER, ///< FidelityFX Brixelizer
+ FFX_EFFECT_BRIXELIZER_GI, ///< FidelityFX Brixelizer GI
+ FFX_EFFECT_CACAO, ///< FidelityFX Combined Adaptive Compute Ambient Occlusion
+ FFX_EFFECT_CAS, ///< FidelityFX Contrast Adaptive Sharpening
+ FFX_EFFECT_DENOISER, ///< FidelityFX Denoiser
+ FFX_EFFECT_LENS, ///< FidelityFX Lens
+ FFX_EFFECT_PARALLEL_SORT, ///< FidelityFX Parallel Sort
+ FFX_EFFECT_SSSR, ///< FidelityFX Stochastic Screen Space Reflections
+ FFX_EFFECT_VARIABLE_SHADING, ///< FidelityFX Variable Shading
+ FFX_EFFECT_LPM, ///< FidelityFX Luma Preserving Mapper
+ FFX_EFFECT_DOF, ///< FidelityFX Depth of Field
+ FFX_EFFECT_CLASSIFIER, ///< FidelityFX Classifier
+ FFX_EFFECT_FSR3UPSCALER, ///< FidelityFX Super Resolution v3
+ FFX_EFFECT_FRAMEINTERPOLATION, ///< FidelityFX Frame Interpolation, part of FidelityFX Super Resolution v3
+ FFX_EFFECT_OPTICALFLOW, ///< FidelityFX Optical Flow, part of FidelityFX Super Resolution v3
+
+ FFX_EFFECT_SHAREDRESOURCES = 127, ///< FidelityFX Shared resources effect ID
+ FFX_EFFECT_SHAREDAPIBACKEND = 128 ///< FidelityFX Shared backend context used with DLL API
+} FfxEffect;
+
+typedef enum FfxBackbufferTransferFunction {
+ FFX_BACKBUFFER_TRANSFER_FUNCTION_SRGB,
+ FFX_BACKBUFFER_TRANSFER_FUNCTION_PQ,
+ FFX_BACKBUFFER_TRANSFER_FUNCTION_SCRGB
+} FfxBackbufferTransferFunction;
+
+/// A typedef representing the graphics device.
+///
+/// @ingroup SDKTypes
+typedef void* FfxDevice;
+
+typedef void* FfxCommandQueue;
+
+typedef void* FfxSwapchain;
+
+/// A typedef representing a command list or command buffer.
+///
+/// @ingroup SDKTypes
+typedef void* FfxCommandList;
+
+/// A typedef for a root signature.
+///
+/// @ingroup SDKTypes
+typedef void* FfxRootSignature;
+
+/// A typedef for a command signature, used for indirect workloads
+///
+/// @ingroup SDKTypes
+typedef void* FfxCommandSignature;
+
+/// A typedef for a pipeline state object.
+///
+/// @ingroup SDKTypes
+typedef void* FfxPipeline;
+
+/// Allocate block of memory.
+///
+/// The callback function for requesting memory of provided size.
+/// size cannot be 0.
+///
+/// @param [in] size Size in bytes of memory to allocate.
+///
+/// @retval
+/// NULL The operation failed.
+/// @retval
+/// Anything else The operation completed successfully.
+///
+/// @ingroup SDKTypes
+typedef void* (*FfxAllocFunc)(
+ size_t size);
+
+/// Reallocate block of memory.
+///
+/// The callback function for reallocating provided block of memory to new location
+/// with specified size. When provided with NULL as ptr
+/// then it should behave as FfxBreadcrumbsAllocFunc.
+/// If the operation failed then contents of ptr
+/// cannot be changed. size cannot be 0.
+///
+/// @param [in] ptr A pointer to previous block of memory.
+/// @param [in] size Size in bytes of memory to allocate.
+///
+/// @retval
+/// NULL The operation failed.
+/// @retval
+/// Anything else The operation completed successfully.
+///
+/// @ingroup SDKTypes
+typedef void* (*FfxReallocFunc)(
+ void* ptr,
+ size_t size);
+
+/// Free block of memory.
+///
+/// The callback function for freeing provided block of memory.
+/// ptr cannot be NULL.
+///
+/// @param [in] ptr A pointer to block of memory.
+///
+/// @ingroup SDKTypes
+typedef void (*FfxFreeFunc)(
+ void* ptr);
+
+/// A structure encapsulating a set of allocation callbacks.
+///
+/// @ingroup SDKTypes
+typedef struct FfxAllocationCallbacks {
+
+ FfxAllocFunc fpAlloc; ///< Callback for allocating memory in the library.
+ FfxReallocFunc fpRealloc; ///< Callback for reallocating memory in the library.
+ FfxFreeFunc fpFree; ///< Callback for freeing allocated memory in the library.
+} FfxAllocationCallbacks;
+
+/// A structure encapsulating the bindless descriptor configuration of an effect.
+///
+/// @ingroup SDKTypes
+typedef struct FfxEffectBindlessConfig {
+ uint32_t maxTextureSrvs; ///< Maximum number of texture SRVs needed in the bindless table.
+ uint32_t maxBufferSrvs; ///< Maximum number of buffer SRVs needed in the bindless table.
+ uint32_t maxTextureUavs; ///< Maximum number of texture UAVs needed in the bindless table.
+ uint32_t maxBufferUavs; ///< Maximum number of buffer UAVs needed in the bindless table.
+} FfxEffectBindlessConfig;
+
+/// A structure encapsulating a collection of device capabilities.
+///
+/// @ingroup SDKTypes
+typedef struct FfxDeviceCapabilities {
+
+ FfxShaderModel maximumSupportedShaderModel; ///< The maximum shader model supported by the device.
+ uint32_t waveLaneCountMin; ///< The minimum supported wavefront width.
+ uint32_t waveLaneCountMax; ///< The maximum supported wavefront width.
+ bool fp16Supported; ///< The device supports FP16 in hardware.
+ bool raytracingSupported; ///< The device supports ray tracing.
+ bool deviceCoherentMemorySupported; ///< The device supports AMD coherent memory.
+ bool dedicatedAllocationSupported; ///< The device supports dedicated allocations for resources.
+ bool bufferMarkerSupported; ///< The device supports AMD buffer markers.
+ bool extendedSynchronizationSupported; ///< The device supports extended synchronization mechanism.
+ bool shaderStorageBufferArrayNonUniformIndexing; ///< The device supports shader storage buffer array non uniform indexing.
+} FfxDeviceCapabilities;
+
+/// A structure encapsulating a 2-dimensional point, using 32bit unsigned integers.
+///
+/// @ingroup SDKTypes
+typedef struct FfxDimensions2D {
+
+ uint32_t width; ///< The width of a 2-dimensional range.
+ uint32_t height; ///< The height of a 2-dimensional range.
+} FfxDimensions2D;
+
+/// A structure encapsulating a 2-dimensional point.
+///
+/// @ingroup SDKTypes
+typedef struct FfxIntCoords2D {
+
+ int32_t x; ///< The x coordinate of a 2-dimensional point.
+ int32_t y; ///< The y coordinate of a 2-dimensional point.
+} FfxIntCoords2D;
+
+/// A structure encapsulating a 2-dimensional rect.
+///
+/// @ingroup SDKTypes
+typedef struct FfxRect2D
+{
+ int32_t left; ///< Left most coordinate
+ int32_t top; ///< Top most coordinate
+ int32_t width; ///< Rect width
+ int32_t height; ///< Rect height
+} FfxRect2D;
+
+/// A structure encapsulating a 2-dimensional set of floating point coordinates.
+///
+/// @ingroup SDKTypes
+typedef struct FfxFloatCoords2D {
+
+ float x; ///< The x coordinate of a 2-dimensional point.
+ float y; ///< The y coordinate of a 2-dimensional point.
+} FfxFloatCoords2D;
+
+/// A structure describing a resource.
+///
+/// @ingroup SDKTypes
+typedef struct FfxResourceDescription {
+
+ FfxResourceType type; ///< The type of the resource.
+ FfxSurfaceFormat format; ///< The surface format.
+ union {
+ uint32_t width; ///< The width of the texture resource.
+ uint32_t size; ///< The size of the buffer resource.
+ };
+
+ union {
+ uint32_t height; ///< The height of the texture resource.
+ uint32_t stride; ///< The stride of the buffer resource.
+ };
+
+ union {
+ uint32_t depth; ///< The depth of the texture resource.
+ uint32_t alignment; ///< The alignment of the buffer resource.
+ };
+
+ uint32_t mipCount; ///< Number of mips (or 0 for full mipchain).
+ FfxResourceFlags flags; ///< A set of FfxResourceFlags flags.
+ FfxResourceUsage usage; ///< Resource usage flags.
+} FfxResourceDescription;
+
+/// An outward facing structure containing a resource
+///
+/// @ingroup SDKTypes
+typedef struct FfxResource {
+ void* resource; ///< pointer to the resource.
+ FfxResourceDescription description;
+ FfxResourceStates state;
+ wchar_t name[FFX_RESOURCE_NAME_SIZE]; ///< (optional) Resource name.
+} FfxResource;
+
+/// A structure describing a static resource.
+///
+/// @ingroup SDKTypes
+typedef struct FfxStaticResourceDescription
+{
+ const FfxResource* resource; ///< The resource to register.
+ FfxDescriptorType descriptorType; ///< The type of descriptor to create.
+ uint32_t descriptorIndex; ///< The destination index of the descriptor within the static table.
+
+ union
+ {
+ uint32_t bufferOffset; ///< The buffer offset in bytes.
+ uint32_t textureUavMip; ///< The mip of the texture resource to create a UAV for.
+ };
+
+ uint32_t bufferSize; ///< The buffer size in bytes.
+ uint32_t bufferStride; ///< The buffer stride in bytes.
+} FfxStaticResourceDescription;
+
+/// A structure describing a constant buffer allocation.
+///
+/// @ingroup SDKTypes
+typedef struct FfxConstantAllocation
+{
+ FfxResource resource; ///< The resource representing the constant buffer resource.
+ FfxUInt64 handle; ///< The binding handle for the constant buffer
+
+} FfxRootConstantAllocation;
+
+/// A function definition for a constant buffer allocation callback
+///
+/// Used to provide a constant buffer allocator to the calling backend
+///
+/// @param [in] data The constant buffer data.
+/// @param [in] dataSize The size of the constant buffer data.
+///
+///
+/// @ingroup SDKTypes
+typedef FfxConstantAllocation(*FfxConstantBufferAllocator)(
+ void* data,
+ const FfxUInt64 dataSize);
+
+/// Information about single AMD FidelityFX Breadcrumbs Library GPU memory block.
+///
+/// @ingroup SDKTypes
+typedef struct FfxBreadcrumbsBlockData {
+ void* memory; ///< Pointer to CPU mapped GPU buffer memory.
+ void* heap; ///< GPU memory block handle.
+ void* buffer; ///< GPU buffer handle for memory block.
+ uint64_t baseAddress; ///< GPU address of memory block.
+ uint32_t nextMarker; ///< Index of next marker to be saved in memory block.
+} FfxBreadcrumbsBlockData;
+
+/// An internal structure containing a handle to a resource and resource views
+///
+/// @ingroup SDKTypes
+typedef struct FfxResourceInternal {
+ int32_t internalIndex; ///< The index of the resource.
+} FfxResourceInternal;
+
+/// An enumeration for resource init data types that can be passed
+///
+/// @ingroup SDKTypes
+typedef enum FfxResourceInitDataType {
+ FFX_RESOURCE_INIT_DATA_TYPE_INVALID = 0,
+ FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED,
+ FFX_RESOURCE_INIT_DATA_TYPE_BUFFER,
+ FFX_RESOURCE_INIT_DATA_TYPE_VALUE,
+} FfxResourceInitDataType;
+
+/// An structure housing all that is needed for resource initialization
+///
+/// @ingroup SDKTypes
+typedef struct FfxResourceInitData
+{
+ FfxResourceInitDataType type; ///< Indicates that the resource will be initialized from a buffer or a value, or stay uninitialized.
+ size_t size; ///< The size, in bytes, of the resource that needed be initialized.
+ union
+ {
+ void* buffer; ///< The buffer used to initialize the resource.
+ unsigned char value; ///< Indicates that the resource will be filled up with this value.
+ };
+
+ static FfxResourceInitData FfxResourceInitValue(size_t dataSize, uint8_t initVal)
+ {
+ FfxResourceInitData initData = { FFX_RESOURCE_INIT_DATA_TYPE_VALUE };
+ initData.size = dataSize;
+ initData.value = initVal;
+ return initData;
+ }
+
+ static FfxResourceInitData FfxResourceInitBuffer(size_t dataSize, void* pInitData)
+ {
+ FfxResourceInitData initData = { FFX_RESOURCE_INIT_DATA_TYPE_BUFFER };
+ initData.size = dataSize;
+ initData.buffer = pInitData;
+ return initData;
+ }
+
+} FfxResourceInitData;
+
+/// An internal structure housing all that is needed for backend resource descriptions
+///
+/// @ingroup SDKTypes
+typedef struct FfxInternalResourceDescription {
+
+ uint32_t id; ///< Resource identifier
+ const wchar_t* name; ///< Name to set to the resource for easier debugging
+ FfxResourceType type; ///< The type of resource (see FfxResourceType)
+ FfxResourceUsage usage; ///< Resource usage flags (see FfxResourceUsage)
+ FfxSurfaceFormat format; ///< The resource format to use
+ uint32_t width; ///< The width (textures) or size (buffers) of the resource
+ uint32_t height; ///< The height (textures) or stride (buffers) of the resource
+ uint32_t mipCount; ///< Mip count (textures) of the resource
+ FfxResourceFlags flags; ///< Resource flags (see FfxResourceFlags)
+ FfxResourceInitData initData; ///< Resource initialization definition (see FfxResourceInitData)
+} FfxInternalResourceDescription;
+
+/// A structure defining the view to create
+///
+/// @ingroup SDKTypes
+typedef struct FfxViewDescription
+{
+ bool uavView; ///< Indicates that the view is a UAV.
+ FfxResourceViewDimension viewDimension; ///< The view dimension to map
+ union {
+ int32_t mipLevel; ///< The mip level of the view, (-1) for default
+ int32_t firstElement; ///< The first element of a buffer view, (-1) for default
+ };
+
+ union {
+ int32_t arraySize; ///< The array size of the view, (-1) for full depth/array size
+ int32_t elementCount; ///< The number of elements in a buffer view, (-1) for full depth/array size
+ };
+
+ int32_t firstSlice; ///< The first slice to map to, (-1) for default first slice
+ wchar_t name[FFX_RESOURCE_NAME_SIZE];
+} FfxViewDescription;
+
+static FfxViewDescription s_FfxViewDescInit = { false, FFX_RESOURCE_VIEW_DIMENSION_TEXTURE_2D, -1, -1, -1, L"" };
+
+/// A structure defining a resource bind point
+///
+/// @ingroup SDKTypes
+typedef struct FfxResourceBinding
+{
+ uint32_t slotIndex; ///< The slot into which to bind the resource
+ uint32_t arrayIndex; ///< The resource offset for mip/array access
+ uint32_t resourceIdentifier; ///< A unique resource identifier representing an internal resource index
+ wchar_t name[FFX_RESOURCE_NAME_SIZE]; ///< A debug name to help track the resource binding
+}FfxResourceBinding;
+
+/// A structure encapsulating a single pass of an algorithm.
+///
+/// @ingroup SDKTypes
+typedef struct FfxPipelineState {
+
+ FfxRootSignature rootSignature; ///< The pipelines rootSignature
+ uint32_t passId; ///< The id of the effect pass this pipeline corresponds to
+ FfxCommandSignature cmdSignature; ///< The command signature used for indirect workloads
+ FfxPipeline pipeline; ///< The pipeline object
+ uint32_t uavTextureCount; ///< Count of Texture UAVs used in this pipeline
+ uint32_t srvTextureCount; ///< Count of Texture SRVs used in this pipeline
+ uint32_t srvBufferCount; ///< Count of Buffer SRV used in this pipeline
+ uint32_t uavBufferCount; ///< Count of Buffer UAVs used in this pipeline
+ uint32_t staticTextureSrvCount; ///< Count of static Texture SRVs used in this pipeline
+ uint32_t staticBufferSrvCount; ///< Count of static Buffer SRVs used in this pipeline
+ uint32_t staticTextureUavCount; ///< Count of static Texture UAVs used in this pipeline
+ uint32_t staticBufferUavCount; ///< Count of static Buffer UAVs used in this pipeline
+ uint32_t constCount; ///< Count of constant buffers used in this pipeline
+
+ FfxResourceBinding uavTextureBindings[FFX_MAX_NUM_UAVS]; ///< Array of ResourceIdentifiers bound as texture UAVs
+ FfxResourceBinding srvTextureBindings[FFX_MAX_NUM_SRVS]; ///< Array of ResourceIdentifiers bound as texture SRVs
+ FfxResourceBinding srvBufferBindings[FFX_MAX_NUM_SRVS]; ///< Array of ResourceIdentifiers bound as buffer SRVs
+ FfxResourceBinding uavBufferBindings[FFX_MAX_NUM_UAVS]; ///< Array of ResourceIdentifiers bound as buffer UAVs
+ FfxResourceBinding constantBufferBindings[FFX_MAX_NUM_CONST_BUFFERS]; ///< Array of ResourceIdentifiers bound as CBs
+
+ wchar_t name[FFX_RESOURCE_NAME_SIZE]; ///< Pipeline name for debugging/profiling purposes
+} FfxPipelineState;
+
+/// A structure containing the data required to create a resource.
+///
+/// @ingroup SDKTypes
+typedef struct FfxCreateResourceDescription {
+
+ FfxHeapType heapType; ///< The heap type to hold the resource, typically FFX_HEAP_TYPE_DEFAULT.
+ FfxResourceDescription resourceDescription; ///< A resource description.
+ FfxResourceStates initialState; ///< The initial resource state.
+ const wchar_t* name; ///< Name of the resource.
+ uint32_t id; ///< Internal resource ID.
+ FfxResourceInitData initData; ///< A struct used to initialize the resource.
+} FfxCreateResourceDescription;
+
+/// A structure containing the data required to create sampler mappings
+///
+/// @ingroup SDKTypes
+typedef struct FfxSamplerDescription {
+
+ FfxFilterType filter;
+ FfxAddressMode addressModeU;
+ FfxAddressMode addressModeV;
+ FfxAddressMode addressModeW;
+ FfxBindStage stage;
+} FfxSamplerDescription;
+
+/// A structure containing the data required to create root constant buffer mappings
+///
+/// @ingroup SDKTypes
+typedef struct FfxRootConstantDescription
+{
+ uint32_t size;
+ FfxBindStage stage;
+} FfxRootConstantDescription;
+
+/// A structure containing the description used to create a
+/// FfxPipeline structure.
+///
+/// A pipeline is the name given to a shader and the collection of state that
+/// is required to dispatch it. In the context of the FidelityFX SDK and its architecture
+/// this means that a FfxPipelineDescription will map to either a
+/// monolithic object in an explicit API (such as a
+/// PipelineStateObject in DirectX 12). Or a shader and some
+/// ancillary API objects (in something like DirectX 11).
+///
+/// The contextFlags field contains a copy of the flags passed
+/// to ffxContextCreate via the flags field of
+/// the FfxInitializationParams structure. These flags are
+/// used to determine which permutation of a pipeline for a specific
+/// FfxPass should be used to implement the features required
+/// by each application, as well as to achieve the best performance on specific
+/// target hardware configurations.
+///
+/// When using one of the provided backends for FidelityFX SDK (such as DirectX 12 or
+/// Vulkan) the data required to create a pipeline is compiled off line and
+/// included into the backend library that you are using. For cases where the
+/// backend interface is overridden by providing custom callback function
+/// implementations care should be taken to respect the contents of the
+/// contextFlags field in order to correctly support the options
+/// provided by the FidelityFX SDK, and achieve best performance.
+/// ///
+/// @ingroup SDKTypes
+typedef struct FfxPipelineDescription {
+
+ uint32_t contextFlags; ///< A collection of FfxInitializationFlagBits which were passed to the context.
+ const FfxSamplerDescription* samplers; ///< A collection of samplers to use when building the root signature for the pipeline
+ size_t samplerCount; ///< Number of samplers to create for the pipeline
+ const FfxRootConstantDescription* rootConstants; ///< A collection of root constant descriptions to use when building the root signature for the pipeline
+ uint32_t rootConstantBufferCount; ///< Number of root constant buffers to create for the pipeline
+ wchar_t name[64]; ///< Pipeline name with which to name the pipeline object
+ FfxBindStage stage; ///< The stage(s) for which this pipeline is being built
+ uint32_t indirectWorkload; ///< Whether this pipeline has an indirect workload
+ FfxSurfaceFormat backbufferFormat; ///< For raster pipelines this contains the backbuffer format
+} FfxPipelineDescription;
+
+/// A structure containing the data required to create a barrier
+///
+/// @ingroup SDKTypes
+typedef struct FfxBarrierDescription
+{
+ FfxResourceInternal resource; ///< The resource representation
+ FfxBarrierType barrierType; ///< The type of barrier to execute
+ FfxResourceStates currentState; ///< The initial state of the resource
+ FfxResourceStates newState; ///< The new state of the resource after barrier
+ uint32_t subResourceID; ///< The subresource id to apply barrier operation to
+} FfxBarrierDescription;
+
+
+/// A structure containing a constant buffer.
+///
+/// @ingroup SDKTypes
+typedef struct FfxConstantBuffer {
+
+ uint32_t num32BitEntries; ///< The size (expressed in 32-bit chunks) stored in data.
+ uint32_t* data; ///< Pointer to constant buffer data
+}FfxConstantBuffer;
+
+/// A structure containing a shader resource view.
+typedef struct FfxTextureSRV
+{
+ FfxResourceInternal resource; ///< Resource corresponding to the shader resource view.
+#ifdef FFX_DEBUG
+ wchar_t name[FFX_RESOURCE_NAME_SIZE];
+#endif
+} FfxTextureSRV;
+
+/// A structure containing a shader resource view.
+typedef struct FfxBufferSRV
+{
+ uint32_t offset; ///< Offset of resource to bind in bytes.
+ uint32_t size; ///< Size of resource to bind in bytes.
+ uint32_t stride; ///< Size of resource to bind in bytes.
+ FfxResourceInternal resource; ///< Resource corresponding to the shader resource view.
+#ifdef FFX_DEBUG
+ wchar_t name[FFX_RESOURCE_NAME_SIZE];
+#endif
+} FfxBufferSRV;
+
+/// A structure containing a unordered access view.
+typedef struct FfxTextureUAV
+{
+ uint32_t mip; ///< Mip level of resource to bind.
+ FfxResourceInternal resource; ///< Resource corresponding to the unordered access view.
+#ifdef FFX_DEBUG
+ wchar_t name[FFX_RESOURCE_NAME_SIZE];
+#endif
+} FfxTextureUAV;
+
+/// A structure containing a unordered access view.
+typedef struct FfxBufferUAV
+{
+ uint32_t offset; ///< Offset of resource to bind in bytes.
+ uint32_t size; ///< Size of resource to bind in bytes.
+ uint32_t stride; ///< Size of resource to bind in bytes.
+ FfxResourceInternal resource; ///< Resource corresponding to the unordered access view.
+#ifdef FFX_DEBUG
+ wchar_t name[FFX_RESOURCE_NAME_SIZE];
+#endif
+} FfxBufferUAV;
+
+/// A structure describing a clear render job.
+///
+/// @ingroup SDKTypes
+typedef struct FfxClearFloatJobDescription {
+
+ float color[4]; ///< The clear color of the resource.
+ FfxResourceInternal target; ///< The resource to be cleared.
+} FfxClearFloatJobDescription;
+
+/// A structure describing a compute render job.
+///
+/// @ingroup SDKTypes
+typedef struct FfxComputeJobDescription {
+
+ FfxPipelineState pipeline; ///< Compute pipeline for the render job.
+ uint32_t dimensions[3]; ///< Dispatch dimensions.
+ FfxResourceInternal cmdArgument; ///< Dispatch indirect cmd argument buffer
+ uint32_t cmdArgumentOffset; ///< Dispatch indirect offset within the cmd argument buffer
+ FfxTextureSRV srvTextures[FFX_MAX_NUM_SRVS]; ///< SRV texture resources to be bound in the compute job.
+ FfxBufferSRV srvBuffers[FFX_MAX_NUM_SRVS]; ///< SRV buffer resources to be bound in the compute job.
+ FfxTextureUAV uavTextures[FFX_MAX_NUM_UAVS]; ///< UAV texture resources to be bound in the compute job.
+ FfxBufferUAV uavBuffers[FFX_MAX_NUM_UAVS]; ///< UAV buffer resources to be bound in the compute job.
+
+ FfxConstantBuffer cbs[FFX_MAX_NUM_CONST_BUFFERS]; ///< Constant buffers to be bound in the compute job.
+#ifdef FFX_DEBUG
+ wchar_t cbNames[FFX_MAX_NUM_CONST_BUFFERS][FFX_RESOURCE_NAME_SIZE];
+#endif
+} FfxComputeJobDescription;
+
+typedef struct FfxRasterJobDescription
+{
+ FfxPipelineState pipeline; ///< Raster pipeline for the render job.
+ uint32_t numVertices;
+ FfxResourceInternal renderTarget;
+ FfxTextureSRV srvTextures[FFX_MAX_NUM_SRVS]; ///< SRV texture resources to be bound in the compute job.
+ FfxTextureUAV uavTextures[FFX_MAX_NUM_UAVS]; ///< UAV texture resources to be bound in the compute job.
+
+ FfxConstantBuffer cbs[FFX_MAX_NUM_CONST_BUFFERS]; ///< Constant buffers to be bound in the compute job.
+#ifdef FFX_DEBUG
+ wchar_t cbNames[FFX_MAX_NUM_CONST_BUFFERS][FFX_RESOURCE_NAME_SIZE];
+#endif
+} FfxRasterJobDescription;
+
+/// A structure describing a copy render job.
+///
+/// @ingroup SDKTypes
+typedef struct FfxCopyJobDescription
+{
+ FfxResourceInternal src; ///< Source resource for the copy.
+ uint32_t srcOffset; ///< Offset into the source buffer in bytes.
+ FfxResourceInternal dst; ///< Destination resource for the copy.
+ uint32_t dstOffset; ///< Offset into the destination buffer in bytes.
+ uint32_t size; ///< Number of bytes to copy (Set to 0 to copy entire buffer).
+} FfxCopyJobDescription;
+
+typedef struct FfxDiscardJobDescription {
+
+ FfxResourceInternal target; ///< The resource to be discarded.
+} FfxDiscardJobDescription;
+
+/// A structure describing a single render job.
+///
+/// @ingroup SDKTypes
+typedef struct FfxGpuJobDescription{
+
+ FfxGpuJobType jobType; ///< Type of the job.
+ wchar_t jobLabel[FFX_RESOURCE_NAME_SIZE]; ///< Job label for markers
+
+ union {
+ FfxClearFloatJobDescription clearJobDescriptor; ///< Clear job descriptor. Valid when jobType is FFX_RENDER_JOB_CLEAR_FLOAT.
+ FfxCopyJobDescription copyJobDescriptor; ///< Copy job descriptor. Valid when jobType is FFX_RENDER_JOB_COPY.
+ FfxComputeJobDescription computeJobDescriptor; ///< Compute job descriptor. Valid when jobType is FFX_RENDER_JOB_COMPUTE.
+ FfxRasterJobDescription rasterJobDescriptor;
+ FfxBarrierDescription barrierDescriptor;
+ FfxDiscardJobDescription discardJobDescriptor;
+ };
+} FfxGpuJobDescription;
+
+#if defined(POPULATE_SHADER_BLOB_FFX)
+#undef POPULATE_SHADER_BLOB_FFX
+#endif // #if defined(POPULATE_SHADER_BLOB_FFX)
+
+/// Macro definition to copy header shader blob information into its SDK structural representation
+///
+/// @ingroup SDKTypes
+#define POPULATE_SHADER_BLOB_FFX(info, index) \
+ { \
+ info[index].blobData, \
+ info[index].blobSize, \
+ info[index].numConstantBuffers, \
+ info[index].numSRVTextures, \
+ info[index].numUAVTextures, \
+ info[index].numSRVBuffers, \
+ info[index].numUAVBuffers, \
+ info[index].numSamplers, \
+ info[index].numRTAccelerationStructures, \
+ info[index].constantBufferNames, \
+ info[index].constantBufferBindings, \
+ info[index].constantBufferCounts, \
+ info[index].constantBufferSpaces, \
+ info[index].srvTextureNames, \
+ info[index].srvTextureBindings, \
+ info[index].srvTextureCounts, \
+ info[index].srvTextureSpaces, \
+ info[index].uavTextureNames, \
+ info[index].uavTextureBindings, \
+ info[index].uavTextureCounts, \
+ info[index].uavTextureSpaces, \
+ info[index].srvBufferNames, \
+ info[index].srvBufferBindings, \
+ info[index].srvBufferCounts, \
+ info[index].srvBufferSpaces, \
+ info[index].uavBufferNames, \
+ info[index].uavBufferBindings, \
+ info[index].uavBufferCounts, \
+ info[index].uavBufferSpaces, \
+ info[index].samplerNames, \
+ info[index].samplerBindings, \
+ info[index].samplerCounts, \
+ info[index].samplerSpaces, \
+ info[index].rtAccelerationStructureNames, \
+ info[index].rtAccelerationStructureBindings, \
+ info[index].rtAccelerationStructureCounts, \
+ info[index].rtAccelerationStructureSpaces \
+ }
+
+/// A single shader blob and a description of its resources.
+///
+/// @ingroup SDKTypes
+typedef struct FfxShaderBlob {
+
+ const uint8_t* data; ///< A pointer to the blob
+ const uint32_t size; ///< Size in bytes.
+
+ const uint32_t cbvCount; ///< Number of CBs.
+ const uint32_t srvTextureCount; ///< Number of SRV Textures.
+ const uint32_t uavTextureCount; ///< Number of UAV Textures.
+ const uint32_t srvBufferCount; ///< Number of SRV Buffers.
+ const uint32_t uavBufferCount; ///< Number of UAV Buffers.
+ const uint32_t samplerCount; ///< Number of Samplers.
+ const uint32_t rtAccelStructCount; ///< Number of RT Acceleration structures.
+
+ // constant buffers
+ const char** boundConstantBufferNames;
+ const uint32_t* boundConstantBuffers; ///< Pointer to an array of bound ConstantBuffers.
+ const uint32_t* boundConstantBufferCounts; ///< Pointer to an array of bound ConstantBuffer resource counts
+ const uint32_t* boundConstantBufferSpaces; ///< Pointer to an array of bound ConstantBuffer resource spaces
+
+ // srv textures
+ const char** boundSRVTextureNames;
+ const uint32_t* boundSRVTextures; ///< Pointer to an array of bound SRV resources.
+ const uint32_t* boundSRVTextureCounts; ///< Pointer to an array of bound SRV resource counts
+ const uint32_t* boundSRVTextureSpaces; ///< Pointer to an array of bound SRV resource spaces
+
+ // uav textures
+ const char** boundUAVTextureNames;
+ const uint32_t* boundUAVTextures; ///< Pointer to an array of bound UAV texture resources.
+ const uint32_t* boundUAVTextureCounts; ///< Pointer to an array of bound UAV texture resource counts
+ const uint32_t* boundUAVTextureSpaces; ///< Pointer to an array of bound UAV texture resource spaces
+
+ // srv buffers
+ const char** boundSRVBufferNames;
+ const uint32_t* boundSRVBuffers; ///< Pointer to an array of bound SRV buffer resources.
+ const uint32_t* boundSRVBufferCounts; ///< Pointer to an array of bound SRV buffer resource counts
+ const uint32_t* boundSRVBufferSpaces; ///< Pointer to an array of bound SRV buffer resource spaces
+
+ // uav buffers
+ const char** boundUAVBufferNames;
+ const uint32_t* boundUAVBuffers; ///< Pointer to an array of bound UAV buffer resources.
+ const uint32_t* boundUAVBufferCounts; ///< Pointer to an array of bound UAV buffer resource counts
+ const uint32_t* boundUAVBufferSpaces; ///< Pointer to an array of bound UAV buffer resource spaces
+
+ // samplers
+ const char** boundSamplerNames;
+ const uint32_t* boundSamplers; ///< Pointer to an array of bound sampler resources.
+ const uint32_t* boundSamplerCounts; ///< Pointer to an array of bound sampler resource counts
+ const uint32_t* boundSamplerSpaces; ///< Pointer to an array of bound sampler resource spaces
+
+ // rt acceleration structures
+ const char** boundRTAccelerationStructureNames;
+ const uint32_t* boundRTAccelerationStructures; ///< Pointer to an array of bound UAV buffer resources.
+ const uint32_t* boundRTAccelerationStructureCounts; ///< Pointer to an array of bound UAV buffer resource counts
+ const uint32_t* boundRTAccelerationStructureSpaces; ///< Pointer to an array of bound UAV buffer resource spaces
+
+} FfxShaderBlob;
+
+/// A structure describing the parameters passed from the
+/// presentation thread to the ui composition callback function.
+///
+/// @ingroup SDKTypes
+typedef struct FfxPresentCallbackDescription
+{
+ FfxDevice device; ///< The active device
+ FfxCommandList commandList; ///< The command list on which to register render commands
+ FfxResource currentBackBuffer; ///< The backbuffer resource with scene information
+ FfxResource currentUI; ///< Optional UI texture (when doing backbuffer + ui blend)
+ FfxResource outputSwapChainBuffer; ///< The swapchain target into which to render ui composition
+ bool isInterpolatedFrame; ///< Whether this is an interpolated or real frame
+ bool usePremulAlpha; ///< Toggles whether UI gets premultiplied alpha blending or not
+ uint64_t frameID;
+} FfxPresentCallbackDescription;
+
+/// A structure describing the parameters to pass to frame generation passes.
+///
+/// @ingroup SDKTypes
+typedef struct FfxFrameGenerationDispatchDescription {
+ FfxCommandList commandList; ///< The command list on which to register render commands
+ FfxResource presentColor; ///< The current presentation color, this will be used as interpolation source data.
+ FfxResource outputs[4]; ///< Interpolation destination targets (1 for each frame in numInterpolatedFrames)
+ uint32_t numInterpolatedFrames; ///< The number of frames to interpolate from the passed in color target
+ bool reset; ///< A boolean value which when set to true, indicates the camera has moved discontinuously.
+ FfxBackbufferTransferFunction backBufferTransferFunction; ///< The transfer function use to convert interpolation source color data to linear RGB.
+ float minMaxLuminance[2]; ///< Min and max luminance values, used when converting HDR colors to linear RGB
+ FfxRect2D interpolationRect; ///< The area of the backbuffer that should be used for interpolation in case only a part of the screen is used e.g. due to movie bars
+ uint64_t frameID;
+} FfxFrameGenerationDispatchDescription;
+
+//struct definition matches FfxApiEffectMemoryUsage
+typedef struct FfxEffectMemoryUsage
+{
+ uint64_t totalUsageInBytes;
+ uint64_t aliasableUsageInBytes;
+} FfxEffectMemoryUsage;
+
+//struct definition matches FfxApiSwapchainFramePacingTuning
+typedef struct FfxSwapchainFramePacingTuning
+{
+ float safetyMarginInMs; // in Millisecond
+ float varianceFactor; // valid range [0.0,1.0]
+ bool allowHybridSpin; //Allows pacing spinlock to sleep.
+ uint32_t hybridSpinTime; //How long to spin when hybridSpin is enabled. Measured in timer resolution units. Not recommended to go below 2. Will result in frequent overshoots.
+ bool allowWaitForSingleObjectOnFence; //Allows to call WaitForSingleObject() instead of spinning for fence value.
+} FfxSwapchainFramePacingTuning;
+
+#ifdef __cplusplus
+}
+#endif // #ifdef __cplusplus
diff --git a/thirdparty/amd-ffx/ffx_util.h b/thirdparty/amd-ffx/ffx_util.h
new file mode 100644
index 000000000000..b9531b019a52
--- /dev/null
+++ b/thirdparty/amd-ffx/ffx_util.h
@@ -0,0 +1,190 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+#include "ffx_types.h"
+
+/// @defgroup Utils Utilities
+/// Utility Macros used by the FidelityFX SDK
+///
+/// @ingroup ffxHost
+
+/// The value of Pi.
+///
+/// @ingroup Utils
+const float FFX_PI = 3.141592653589793f;
+
+/// An epsilon value for floating point numbers.
+///
+/// @ingroup Utils
+const float FFX_EPSILON = 1e-06f;
+
+/// Helper macro to create the version number.
+///
+/// @ingroup Utils
+#define FFX_MAKE_VERSION(major, minor, patch) ((major << 22) | (minor << 12) | patch)
+
+///< Use this to specify no version.
+///
+/// @ingroup Utils
+#define FFX_UNSPECIFIED_VERSION 0xFFFFAD00
+
+/// Helper macro to avoid warnings about unused variables.
+///
+/// @ingroup Utils
+#define FFX_UNUSED(x) ((void)(x))
+
+/// Helper macro to align an integer to the specified power of 2 boundary
+///
+/// @ingroup Utils
+#define FFX_ALIGN_UP(x, y) (((x) + ((y)-1)) & ~((y)-1))
+
+/// Helper macro to check if a value is aligned.
+///
+/// @ingroup Utils
+#define FFX_IS_ALIGNED(x) (((x) != 0) && ((x) & ((x)-1)))
+
+/// Helper macro to compute the rounded-up integer division of two unsigned integers
+///
+/// @ingroup Utils
+#define FFX_DIVIDE_ROUNDING_UP(x, y) ((x + y - 1) / y)
+
+/// Helper macro to stringify a value.
+///
+/// @ingroup Utils
+#define FFX_STR(s) FFX_XSTR(s)
+#define FFX_XSTR(s) #s
+
+/// Helper macro to forward declare a structure.
+///
+/// @ingroup Utils
+#define FFX_FORWARD_DECLARE(x) typedef struct x x
+
+/// Helper macro to return the maximum of two values.
+///
+/// @ingroup Utils
+#define FFX_MAXIMUM(x, y) (((x) > (y)) ? (x) : (y))
+
+/// Helper macro to return the minimum of two values.
+///
+/// @ingroup Utils
+#define FFX_MINIMUM(x, y) (((x) < (y)) ? (x) : (y))
+
+/// Helper macro to do safe free on a pointer.
+///
+/// @ingroup Utils
+#define FFX_SAFE_FREE(x, freeFunc) \
+ do { \
+ if (x) \
+ { \
+ freeFunc(x); \
+ x = nullptr; \
+ } \
+ } while (false)
+
+/// Helper macro to return the abs of an integer value.
+///
+/// @ingroup Utils
+#define FFX_ABSOLUTE(x) (((x) < 0) ? (-(x)) : (x))
+
+/// Helper macro to return sign of a value.
+///
+/// @ingroup Utils
+#define FFX_SIGN(x) (((x) < 0) ? -1 : 1)
+
+/// Helper macro to work out the number of elements in an array.
+///
+/// @ingroup Utils
+#define FFX_ARRAY_ELEMENTS(x) (int32_t)((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
+
+/// The maximum length of a path that can be specified to the FidelityFX API.
+///
+/// @ingroup Utils
+#define FFX_MAXIMUM_PATH (260)
+
+/// Helper macro to check if the specified key is set in a bitfield.
+///
+/// @ingroup Utils
+#define FFX_CONTAINS_FLAG(options, key) (((options) & key) == key)
+
+#if defined(FFX_MUTEX_IMPL_SHARED)
+/// Lock mutex exclusively.
+///
+/// @ingroup Utils
+#define FFX_MUTEX_LOCK(x) x.lock()
+/// Lock mutex for shared access.
+///
+/// @ingroup Utils
+#define FFX_MUTEX_LOCK_SHARED(x) x.lock_shared()
+/// Unlock exclusive mutex lock.
+///
+/// @ingroup Utils
+#define FFX_MUTEX_UNLOCK(x) x.unlock()
+/// Unlock shared mutex lock.
+///
+/// @ingroup Utils
+#define FFX_MUTEX_UNLOCK_SHARED(x) x.unlock_shared()
+#elif defined(FFX_MUTEX_IMPL_STANDARD)
+/// Lock mutex exclusively.
+///
+/// @ingroup Utils
+#define FFX_MUTEX_LOCK(x) x.lock()
+/// Lock mutex for shared access.
+///
+/// @ingroup Utils
+#define FFX_MUTEX_LOCK_SHARED(x) FFX_MUTEX_LOCK(x)
+/// Unlock exclusive mutex lock.
+///
+/// @ingroup Utils
+#define FFX_MUTEX_UNLOCK(x) x.unlock()
+/// Unlock shared mutex lock.
+///
+/// @ingroup Utils
+#define FFX_MUTEX_UNLOCK_SHARED(x) FFX_MUTEX_UNLOCK(x)
+#elif !defined(FFX_MUTEX_LOCK) || !defined(FFX_MUTEX_LOCK_SHARED) || !defined(FFX_MUTEX_UNLOCK) || !defined(FFX_MUTEX_UNLOCK_SHARED)
+#error When using custom mutex you have to provide all following operations too: FFX_MUTEX_LOCK, FFX_MUTEX_LOCK_SHARED, FFX_MUTEX_UNLOCK, FFX_MUTEX_UNLOCK_SHARED!
+#endif // #if defined(FFX_MUTEX_IMPL_SHARED)
+
+/// Computes the number of bits set to 1 in a integer.
+///
+/// @param [in] val Integer mask.
+///
+/// @return Number of bits set to 1 in provided val.
+///
+/// @ingroup Utils
+inline uint8_t ffxCountBitsSet(uint32_t val) noexcept
+{
+#if __cplusplus >= 202002L
+ return static_cast(std::popcount(val));
+#elif defined(_MSVC_LANG)
+ return static_cast(__popcnt(val));
+#elif defined(__GNUC__) || defined(__clang__)
+ return static_cast(__builtin_popcount(val));
+#else
+ uint32_t c = val - ((val >> 1) & 0x55555555);
+ c = ((c >> 2) & 0x33333333) + (c & 0x33333333);
+ c = ((c >> 4) + c) & 0x0F0F0F0F;
+ c = ((c >> 8) + c) & 0x00FF00FF;
+ return static_cast(((c >> 16) + c) & 0x0000FFFF);
+#endif
+}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_common_types.h b/thirdparty/amd-ffx/gpu/ffx_common_types.h
similarity index 76%
rename from thirdparty/amd-fsr2/shaders/ffx_common_types.h
rename to thirdparty/amd-ffx/gpu/ffx_common_types.h
index ddd17862b61a..2c4f0ba02dfd 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_common_types.h
+++ b/thirdparty/amd-ffx/gpu/ffx_common_types.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -18,6 +19,7 @@
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
+
#ifndef FFX_COMMON_TYPES_H
#define FFX_COMMON_TYPES_H
@@ -25,127 +27,237 @@
#define FFX_PARAMETER_IN
#define FFX_PARAMETER_OUT
#define FFX_PARAMETER_INOUT
+#define FFX_PARAMETER_UNIFORM
#elif defined(FFX_HLSL)
#define FFX_PARAMETER_IN in
#define FFX_PARAMETER_OUT out
#define FFX_PARAMETER_INOUT inout
+#define FFX_PARAMETER_UNIFORM uniform
#elif defined(FFX_GLSL)
#define FFX_PARAMETER_IN in
#define FFX_PARAMETER_OUT out
#define FFX_PARAMETER_INOUT inout
+#define FFX_PARAMETER_UNIFORM const //[cacao_placeholder] until a better fit is found!
#endif // #if defined(FFX_CPU)
#if defined(FFX_CPU)
/// A typedef for a boolean value.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef bool FfxBoolean;
/// A typedef for a unsigned 8bit integer.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef uint8_t FfxUInt8;
/// A typedef for a unsigned 16bit integer.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef uint16_t FfxUInt16;
/// A typedef for a unsigned 32bit integer.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef uint32_t FfxUInt32;
/// A typedef for a unsigned 64bit integer.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef uint64_t FfxUInt64;
/// A typedef for a signed 8bit integer.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef int8_t FfxInt8;
/// A typedef for a signed 16bit integer.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef int16_t FfxInt16;
/// A typedef for a signed 32bit integer.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef int32_t FfxInt32;
/// A typedef for a signed 64bit integer.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef int64_t FfxInt64;
/// A typedef for a floating point value.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef float FfxFloat32;
/// A typedef for a 2-dimensional floating point value.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef float FfxFloat32x2[2];
/// A typedef for a 3-dimensional floating point value.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef float FfxFloat32x3[3];
/// A typedef for a 4-dimensional floating point value.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef float FfxFloat32x4[4];
-/// A typedef for a 2-dimensional 32bit unsigned integer.
+/// A typedef for a 2x2 floating point matrix.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32x2x2[4];
+
+/// A typedef for a 3x3 floating point matrix.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32x3x3[9];
+
+/// A typedef for a 3x4 floating point matrix.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32x3x4[12];
+
+/// A typedef for a 4x4 floating point matrix.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
+typedef float FfxFloat32x4x4[16];
+
+/// A typedef for a 2-dimensional 32bit signed integer.
+///
+/// @ingroup CPUTypes
+typedef int32_t FfxInt32x2[2];
+
+/// A typedef for a 3-dimensional 32bit signed integer.
+///
+/// @ingroup CPUTypes
+typedef int32_t FfxInt32x3[3];
+
+/// A typedef for a 4-dimensional 32bit signed integer.
+///
+/// @ingroup CPUTypes
+typedef int32_t FfxInt32x4[4];
+
+/// A typedef for a 2-dimensional 32bit usigned integer.
+///
+/// @ingroup CPUTypes
typedef uint32_t FfxUInt32x2[2];
/// A typedef for a 3-dimensional 32bit unsigned integer.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef uint32_t FfxUInt32x3[3];
/// A typedef for a 4-dimensional 32bit unsigned integer.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
typedef uint32_t FfxUInt32x4[4];
#endif // #if defined(FFX_CPU)
#if defined(FFX_HLSL)
+
+#define FfxFloat32Mat4 matrix
+#define FfxFloat32Mat3 matrix
+
/// A typedef for a boolean value.
///
-/// @ingroup GPU
+/// @ingroup HLSLTypes
typedef bool FfxBoolean;
-#if FFX_HLSL_6_2
+#if FFX_HLSL_SM>=62
+
+/// @defgroup HLSL62Types HLSL 6.2 And Above Types
+/// HLSL 6.2 and above type defines for all commonly used variables
+///
+/// @ingroup HLSLTypes
+
+/// A typedef for a floating point value.
+///
+/// @ingroup HLSL62Types
typedef float32_t FfxFloat32;
+
+/// A typedef for a 2-dimensional floating point value.
+///
+/// @ingroup HLSL62Types
typedef float32_t2 FfxFloat32x2;
+
+/// A typedef for a 3-dimensional floating point value.
+///
+/// @ingroup HLSL62Types
typedef float32_t3 FfxFloat32x3;
+
+/// A typedef for a 4-dimensional floating point value.
+///
+/// @ingroup HLSL62Types
typedef float32_t4 FfxFloat32x4;
+/// A [cacao_placeholder] typedef for matrix type until confirmed.
+typedef float4x4 FfxFloat32x4x4;
+typedef float3x4 FfxFloat32x3x4;
+typedef float3x3 FfxFloat32x3x3;
+typedef float2x2 FfxFloat32x2x2;
+
/// A typedef for a unsigned 32bit integer.
///
-/// @ingroup GPU
+/// @ingroup HLSL62Types
typedef uint32_t FfxUInt32;
+
+/// A typedef for a 2-dimensional 32bit unsigned integer.
+///
+/// @ingroup HLSL62Types
typedef uint32_t2 FfxUInt32x2;
+
+/// A typedef for a 3-dimensional 32bit unsigned integer.
+///
+/// @ingroup HLSL62Types
typedef uint32_t3 FfxUInt32x3;
+
+/// A typedef for a 4-dimensional 32bit unsigned integer.
+///
+/// @ingroup HLSL62Types
typedef uint32_t4 FfxUInt32x4;
+
+/// A typedef for a signed 32bit integer.
+///
+/// @ingroup HLSL62Types
typedef int32_t FfxInt32;
+
+/// A typedef for a 2-dimensional signed 32bit integer.
+///
+/// @ingroup HLSL62Types
typedef int32_t2 FfxInt32x2;
+
+/// A typedef for a 3-dimensional signed 32bit integer.
+///
+/// @ingroup HLSL62Types
typedef int32_t3 FfxInt32x3;
+
+/// A typedef for a 4-dimensional signed 32bit integer.
+///
+/// @ingroup HLSL62Types
typedef int32_t4 FfxInt32x4;
-#else
+
+#else // #if FFX_HLSL_SM>=62
+
+/// @defgroup HLSLBaseTypes HLSL 6.1 And Below Types
+/// HLSL 6.1 and below type defines for all commonly used variables
+///
+/// @ingroup HLSLTypes
+
#define FfxFloat32 float
#define FfxFloat32x2 float2
#define FfxFloat32x3 float3
#define FfxFloat32x4 float4
+/// A [cacao_placeholder] typedef for matrix type until confirmed.
+#define FfxFloat32x4x4 float4x4
+#define FfxFloat32x3x4 float3x4
+#define FfxFloat32x3x3 float3x3
+#define FfxFloat32x2x2 float2x2
+
/// A typedef for a unsigned 32bit integer.
///
/// @ingroup GPU
@@ -153,14 +265,18 @@ typedef uint FfxUInt32;
typedef uint2 FfxUInt32x2;
typedef uint3 FfxUInt32x3;
typedef uint4 FfxUInt32x4;
+
typedef int FfxInt32;
typedef int2 FfxInt32x2;
typedef int3 FfxInt32x3;
typedef int4 FfxInt32x4;
-#endif // #if defined(FFX_HLSL_6_2)
+
+#endif // #if FFX_HLSL_SM>=62
#if FFX_HALF
-#if FFX_HLSL_6_2
+
+#if FFX_HLSL_SM >= 62
+
typedef float16_t FfxFloat16;
typedef float16_t2 FfxFloat16x2;
typedef float16_t3 FfxFloat16x3;
@@ -168,7 +284,7 @@ typedef float16_t4 FfxFloat16x4;
/// A typedef for an unsigned 16bit integer.
///
-/// @ingroup GPU
+/// @ingroup HLSLTypes
typedef uint16_t FfxUInt16;
typedef uint16_t2 FfxUInt16x2;
typedef uint16_t3 FfxUInt16x3;
@@ -176,12 +292,12 @@ typedef uint16_t4 FfxUInt16x4;
/// A typedef for a signed 16bit integer.
///
-/// @ingroup GPU
+/// @ingroup HLSLTypes
typedef int16_t FfxInt16;
typedef int16_t2 FfxInt16x2;
typedef int16_t3 FfxInt16x3;
typedef int16_t4 FfxInt16x4;
-#else
+#else // #if FFX_HLSL_SM>=62
typedef min16float FfxFloat16;
typedef min16float2 FfxFloat16x2;
typedef min16float3 FfxFloat16x3;
@@ -189,7 +305,7 @@ typedef min16float4 FfxFloat16x4;
/// A typedef for an unsigned 16bit integer.
///
-/// @ingroup GPU
+/// @ingroup HLSLTypes
typedef min16uint FfxUInt16;
typedef min16uint2 FfxUInt16x2;
typedef min16uint3 FfxUInt16x3;
@@ -197,19 +313,25 @@ typedef min16uint4 FfxUInt16x4;
/// A typedef for a signed 16bit integer.
///
-/// @ingroup GPU
+/// @ingroup HLSLTypes
typedef min16int FfxInt16;
typedef min16int2 FfxInt16x2;
typedef min16int3 FfxInt16x3;
typedef min16int4 FfxInt16x4;
-#endif // FFX_HLSL_6_2
+#endif // #if FFX_HLSL_SM>=62
+
#endif // FFX_HALF
+
#endif // #if defined(FFX_HLSL)
#if defined(FFX_GLSL)
+
+#define FfxFloat32Mat4 mat4
+#define FfxFloat32Mat3 mat3
+
/// A typedef for a boolean value.
///
-/// @ingroup GPU
+/// @ingroup GLSLTypes
#define FfxBoolean bool
#define FfxFloat32 float
#define FfxFloat32x2 vec2
@@ -223,6 +345,13 @@ typedef min16int4 FfxInt16x4;
#define FfxInt32x2 ivec2
#define FfxInt32x3 ivec3
#define FfxInt32x4 ivec4
+
+/// A [cacao_placeholder] typedef for matrix type until confirmed.
+#define FfxFloat32x4x4 mat4
+#define FfxFloat32x3x4 mat4x3
+#define FfxFloat32x3x3 mat3
+#define FfxFloat32x2x2 mat2
+
#if FFX_HALF
#define FfxFloat16 float16_t
#define FfxFloat16x2 f16vec2
@@ -241,11 +370,11 @@ typedef min16int4 FfxInt16x4;
// Global toggles:
// #define FFX_HALF (1)
-// #define FFX_HLSL_6_2 (1)
+// #define FFX_HLSL_SM (62)
#if FFX_HALF
-#if FFX_HLSL_6_2
+#if FFX_HLSL_SM >= 62
#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName;
#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName;
@@ -255,7 +384,7 @@ typedef min16int4 FfxInt16x4;
#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName;
#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName;
-#else //FFX_HLSL_6_2
+#else //FFX_HLSL_SM>=62
#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef min16##BaseComponentType TypeName;
#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName;
@@ -265,7 +394,7 @@ typedef min16int4 FfxInt16x4;
#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL );
#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL );
-#endif //FFX_HLSL_6_2
+#endif //FFX_HLSL_SM>=62
#else //FFX_HALF
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core.h b/thirdparty/amd-ffx/gpu/ffx_core.h
similarity index 61%
rename from thirdparty/amd-fsr2/shaders/ffx_core.h
rename to thirdparty/amd-ffx/gpu/ffx_core.h
index 4e687d6e3d6b..d1ed14419b78 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_core.h
+++ b/thirdparty/amd-ffx/gpu/ffx_core.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -19,17 +20,39 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
-/// @defgroup Core
-/// @defgroup HLSL
-/// @defgroup GLSL
-/// @defgroup GPU
-/// @defgroup CPU
-/// @defgroup CAS
-/// @defgroup FSR1
+/// @defgroup FfxGPU GPU
+/// The FidelityFX SDK GPU References
+///
+/// @ingroup ffxSDK
+
+/// @defgroup FfxHLSL HLSL References
+/// FidelityFX SDK HLSL GPU References
+///
+/// @ingroup FfxGPU
+
+/// @defgroup FfxGLSL GLSL References
+/// FidelityFX SDK GLSL GPU References
+///
+/// @ingroup FfxGPU
+
+/// @defgroup FfxGPUEffects FidelityFX GPU References
+/// FidelityFX Effect GPU Reference Documentation
+///
+/// @ingroup FfxGPU
+
+/// @defgroup GPUCore GPU Core
+/// GPU defines and functions
+///
+/// @ingroup FfxGPU
#if !defined(FFX_CORE_H)
#define FFX_CORE_H
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic push
+#pragma dxc diagnostic ignored "-Wambig-lit-shift"
+#endif //__hlsl_dx_compiler
+
#include "ffx_common_types.h"
#if defined(FFX_CPU)
@@ -49,4 +72,9 @@
#include "ffx_core_gpu_common_half.h"
#include "ffx_core_portability.h"
#endif // #if defined(FFX_GPU)
-#endif // #if !defined(FFX_CORE_H)
\ No newline at end of file
+
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic pop
+#endif //__hlsl_dx_compiler
+
+#endif // #if !defined(FFX_CORE_H)
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_cpu.h b/thirdparty/amd-ffx/gpu/ffx_core_cpu.h
similarity index 89%
rename from thirdparty/amd-fsr2/shaders/ffx_core_cpu.h
rename to thirdparty/amd-ffx/gpu/ffx_core_cpu.h
index 3bf0295bfc62..4b6c41aa5b28 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_core_cpu.h
+++ b/thirdparty/amd-ffx/gpu/ffx_core_cpu.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -21,21 +22,26 @@
/// A define for a true value in a boolean expression.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
#define FFX_TRUE (1)
/// A define for a false value in a boolean expression.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
#define FFX_FALSE (0)
#if !defined(FFX_STATIC)
/// A define to abstract declaration of static variables and functions.
///
-/// @ingroup CPU
+/// @ingroup CPUTypes
#define FFX_STATIC static
#endif // #if !defined(FFX_STATIC)
+/// @defgroup CPUCore CPU Core
+/// Core CPU-side defines and functions
+///
+/// @ingroup ffxHost
+
#ifdef __clang__
#pragma clang diagnostic ignored "-Wunused-variable"
#endif
@@ -47,7 +53,7 @@
/// @returns
/// An unsigned 32bit integer value containing the bit pattern of x.
///
-/// @ingroup CPU
+/// @ingroup CPUCore
FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x)
{
union
@@ -89,7 +95,7 @@ FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup CPU
+/// @ingroup CPUCore
FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
{
return y * t + (-x * t + x);
@@ -102,10 +108,10 @@ FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
/// @returns
/// The reciprocal value of x.
///
-/// @ingroup CPU
-FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 a)
+/// @ingroup CPUCore
+FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 x)
{
- return 1.0f / a;
+ return 1.0f / x;
}
/// Compute the square root of a value.
@@ -115,13 +121,13 @@ FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 a)
/// @returns
/// The the square root of x.
///
-/// @ingroup CPU
+/// @ingroup CPUCore
FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x)
{
- return sqrt(x);
+ return FfxFloat32(sqrt(x));
}
-FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
+FFX_STATIC FfxUInt32 ffxAShrSU1(FfxUInt32 a, FfxUInt32 b)
{
return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
}
@@ -135,10 +141,10 @@ FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
/// @returns
/// The fractional part of x.
///
-/// @ingroup CPU
-FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 a)
+/// @ingroup CPUCore
+FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 x)
{
- return a - floor(a);
+ return x - FfxFloat32(floor(x));
}
/// Compute the reciprocal square root of a value.
@@ -148,10 +154,10 @@ FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 a)
/// @returns
/// The reciprocal square root value of x.
///
-/// @ingroup CPU
-FFX_STATIC FfxFloat32 rsqrt(FfxFloat32 a)
+/// @ingroup CPUCore
+FFX_STATIC FfxFloat32 ffxRsqrt(FfxFloat32 x)
{
- return ffxReciprocal(ffxSqrt(a));
+ return ffxReciprocal(ffxSqrt(x));
}
FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
@@ -181,16 +187,16 @@ FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
/// @returns
/// The clamped version of x.
///
-/// @ingroup CPU
-FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 a)
+/// @ingroup CPUCore
+FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 x)
{
- return ffxMin(1.0f, ffxMax(0.0f, a));
+ return ffxMin(1.0f, ffxMax(0.0f, x));
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+FFX_STATIC void ffxOpAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
{
d[0] = a[0] + b;
d[1] = a[1] + b;
@@ -198,7 +204,7 @@ FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
return;
}
-FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
+FFX_STATIC void ffxOpACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
{
d[0] = a[0];
d[1] = a[1];
@@ -206,7 +212,7 @@ FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
return;
}
-FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
+FFX_STATIC void ffxOpAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
{
d[0] = a[0] * b[0];
d[1] = a[1] * b[1];
@@ -214,7 +220,7 @@ FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
return;
}
-FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+FFX_STATIC void ffxOpAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
{
d[0] = a[0] * b;
d[1] = a[1] * b;
@@ -222,7 +228,7 @@ FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
return;
}
-FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
+FFX_STATIC void ffxOpARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
{
d[0] = ffxReciprocal(a[0]);
d[1] = ffxReciprocal(a[1]);
@@ -245,8 +251,8 @@ FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
/// @returns
/// The closest 16bit floating point value to f.
///
-/// @ingroup CPU
-FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f)
+/// @ingroup CPUCore
+FFX_STATIC FfxUInt32 ffxF32ToF16(FfxFloat32 f)
{
static FfxUInt16 base[512] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
@@ -320,13 +326,13 @@ FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f)
/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
/// 32bit unsigned integer respectively.
///
-/// @param [in] value A 2-dimensional floating point value to convert and pack.
+/// @param [in] x A 2-dimensional floating point value to convert and pack.
///
/// @returns
/// A packed 32bit value containing 2 16bit floating point values.
///
-/// @ingroup CPU
-FFX_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 a)
+/// @ingroup CPUCore
+FFX_STATIC FfxUInt32 ffxPackHalf2x16(FfxFloat32x2 x)
{
- return f32tof16(a[0]) + (f32tof16(a[1]) << 16);
+ return ffxF32ToF16(x[0]) + (ffxF32ToF16(x[1]) << 16);
}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_glsl.h b/thirdparty/amd-ffx/gpu/ffx_core_glsl.h
similarity index 81%
rename from thirdparty/amd-fsr2/shaders/ffx_core_glsl.h
rename to thirdparty/amd-ffx/gpu/ffx_core_glsl.h
index 6ec58f3c625c..c8dccacdbc44 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_core_glsl.h
+++ b/thirdparty/amd-ffx/gpu/ffx_core_glsl.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -19,180 +20,230 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
+/// @defgroup GLSLCore GLSL Core
+/// GLSL core defines and functions
+///
+/// @ingroup FfxGLSL
+
+/// A define for abstracting select functionality for pre/post HLSL 21
+///
+/// @ingroup GLSLCore
+#define FFX_SELECT(cond, arg1, arg2) cond ? arg1 : arg2
+
/// A define for abstracting shared memory between shading languages.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_GROUPSHARED shared
/// A define for abstracting compute memory barriers between shading languages.
///
-/// @ingroup GPU
-#define FFX_GROUP_MEMORY_BARRIER() barrier()
+/// @ingroup GLSLCore
+#define FFX_GROUP_MEMORY_BARRIER groupMemoryBarrier(); barrier()
+
+/// A define for abstracting compute atomic additions between shading languages.
+///
+/// @ingroup GLSLCore
+#define FFX_ATOMIC_ADD(x, y) atomicAdd(x, y)
+
+/// A define for abstracting compute atomic additions between shading languages.
+///
+/// @ingroup GLSLCore
+#define FFX_ATOMIC_ADD_RETURN(x, y, r) r = atomicAdd(x, y)
+
+/// A define for abstracting compute atomic OR between shading languages.
+///
+/// @ingroup GLSLCore
+#define FFX_ATOMIC_OR(x, y) atomicOr(x, y)
+
+/// A define for abstracting compute atomic min between shading languages.
+///
+/// @ingroup GLSLCore
+#define FFX_ATOMIC_MIN(x, y) atomicMin(x, y)
+
+/// A define for abstracting compute atomic max between shading languages.
+///
+/// @ingroup GLSLCore
+#define FFX_ATOMIC_MAX(x, y) atomicMax(x, y)
/// A define added to accept static markup on functions to aid CPU/GPU portability of code.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_STATIC
/// A define for abstracting loop unrolling between shading languages.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_UNROLL
/// A define for abstracting a 'greater than' comparison operator between two types.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_GREATER_THAN(x, y) greaterThan(x, y)
/// A define for abstracting a 'greater than or equal' comparison operator between two types.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_GREATER_THAN_EQUAL(x, y) greaterThanEqual(x, y)
/// A define for abstracting a 'less than' comparison operator between two types.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_LESS_THAN(x, y) lessThan(x, y)
/// A define for abstracting a 'less than or equal' comparison operator between two types.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_LESS_THAN_EQUAL(x, y) lessThanEqual(x, y)
/// A define for abstracting an 'equal' comparison operator between two types.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_EQUAL(x, y) equal(x, y)
/// A define for abstracting a 'not equal' comparison operator between two types.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_NOT_EQUAL(x, y) notEqual(x, y)
+/// A define for abstracting matrix multiply operations between shading languages.
+///
+/// @ingroup GLSLCore
+#define FFX_MATRIX_MULTIPLY(a, b) (a * b)
+
+/// A define for abstracting vector transformations between shading languages.
+///
+/// @ingroup GLSLCore
+#define FFX_TRANSFORM_VECTOR(a, b) (a * b)
+
+/// A define for abstracting modulo operations between shading languages.
+///
+/// @ingroup GLSLCore
+#define FFX_MODULO(a, b) (mod(a, b))
+
/// Broadcast a scalar value to a 1-dimensional floating point vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x)
/// Broadcast a scalar value to a 2-dimensional floating point vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32x2(FfxFloat32(x))
/// Broadcast a scalar value to a 3-dimensional floating point vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32x3(FfxFloat32(x))
/// Broadcast a scalar value to a 4-dimensional floating point vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32x4(FfxFloat32(x))
/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_UINT32(x) FfxUInt32(x)
/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_UINT32X2(x) FfxUInt32x2(FfxUInt32(x))
/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_UINT32X3(x) FfxUInt32x3(FfxUInt32(x))
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_UINT32X4(x) FfxUInt32x4(FfxUInt32(x))
/// Broadcast a scalar value to a 1-dimensional signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_INT32(x) FfxInt32(x)
/// Broadcast a scalar value to a 2-dimensional signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_INT32X2(x) FfxInt32x2(FfxInt32(x))
/// Broadcast a scalar value to a 3-dimensional signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_INT32X3(x) FfxInt32x3(FfxInt32(x))
/// Broadcast a scalar value to a 4-dimensional signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_INT32X4(x) FfxInt32x4(FfxInt32(x))
/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_MIN_FLOAT16(x) FFX_MIN16_F(x)
/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_MIN_FLOAT16X2(x) FFX_MIN16_F2(FFX_MIN16_F(x))
/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_MIN_FLOAT16X3(x) FFX_MIN16_F3(FFX_MIN16_F(x))
/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_MIN_FLOAT16X4(x) FFX_MIN16_F4(FFX_MIN16_F(x))
/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_MIN_UINT16(x) FFX_MIN16_U(x)
/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_MIN_UINT16X2(x) FFX_MIN16_U2(FFX_MIN16_U(x))
/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_MIN_UINT16X3(x) FFX_MIN16_U3(FFX_MIN16_U(x))
/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_MIN_UINT16X4(x) FFX_MIN16_U4(FFX_MIN16_U(x))
/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_MIN_INT16(x) FFX_MIN16_I(x)
/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_MIN_INT16X2(x) FFX_MIN16_I2(FFX_MIN16_I(x))
/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_MIN_INT16X3(x) FFX_MIN16_I3(FFX_MIN16_I(x))
/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup GLSLCore
#define FFX_BROADCAST_MIN_INT16X4(x) FFX_MIN16_I4(FFX_MIN16_I(x))
+ #extension GL_EXT_shader_explicit_arithmetic_types : require
#if !defined(FFX_SKIP_EXT)
#if FFX_HALF
#extension GL_EXT_shader_16bit_storage : require
- #extension GL_EXT_shader_explicit_arithmetic_types : require
#endif // FFX_HALF
#if defined(FFX_LONG)
@@ -216,12 +267,12 @@ FfxFloat32x4 ffxSqrt(FfxFloat32x4 x);
/// Interprets the bit pattern of x as a floating-point number.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as a floating-point number.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32 ffxAsFloat(FfxUInt32 x)
{
return uintBitsToFloat(x);
@@ -229,12 +280,12 @@ FfxFloat32 ffxAsFloat(FfxUInt32 x)
/// Interprets the bit pattern of x as a floating-point number.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as a floating-point number.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
{
return uintBitsToFloat(x);
@@ -242,12 +293,12 @@ FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
/// Interprets the bit pattern of x as a floating-point number.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as a floating-point number.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
{
return uintBitsToFloat(x);
@@ -255,12 +306,12 @@ FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
/// Interprets the bit pattern of x as a floating-point number.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as a floating-point number.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
{
return uintBitsToFloat(x);
@@ -268,12 +319,12 @@ FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
/// Interprets the bit pattern of x as an unsigned integer.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as an unsigned integer.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32 ffxAsUInt32(FfxFloat32 x)
{
return floatBitsToUint(x);
@@ -281,12 +332,12 @@ FfxUInt32 ffxAsUInt32(FfxFloat32 x)
/// Interprets the bit pattern of x as an unsigned integer.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as an unsigned integer.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
{
return floatBitsToUint(x);
@@ -294,12 +345,12 @@ FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
/// Interprets the bit pattern of x as an unsigned integer.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as an unsigned integer.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
{
return floatBitsToUint(x);
@@ -307,26 +358,43 @@ FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
/// Interprets the bit pattern of x as an unsigned integer.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as an unsigned integer.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)
{
return floatBitsToUint(x);
}
+/// Pack 2x32-bit floating point values in a single 32bit value.
+///
+/// This function first converts each component of value into their nearest 16-bit floating
+/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
+/// 32bit unsigned integer respectively.
+///
+/// @param [in] value A 2-dimensional floating point value to convert and pack.
+///
+/// @returns
+/// A packed 32bit value containing 2 16bit floating point values.
+///
+/// @ingroup GLSLCore
+FfxUInt32 ffxPackHalf2x16(FfxFloat32x2 value)
+{
+ return packHalf2x16(value);
+}
+
/// Convert a 32bit IEEE 754 floating point value to its nearest 16bit equivalent.
///
/// @param [in] value The value to convert.
-///
+///
/// @returns
/// The nearest 16bit equivalent of value.
-///
-/// @ingroup GLSL
-FfxUInt32 f32tof16(FfxFloat32 value)
+///
+/// @ingroup GLSLCore
+FfxUInt32 ffxF32ToF16(FfxFloat32 value)
{
return packHalf2x16(FfxFloat32x2(value, 0.0));
}
@@ -338,7 +406,7 @@ FfxUInt32 f32tof16(FfxFloat32 value)
/// @returns
/// A 2-dimensional floating point vector with value in each component.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
{
return FfxFloat32x2(value, value);
@@ -351,7 +419,7 @@ FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
/// @returns
/// A 3-dimensional floating point vector with value in each component.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
{
return FfxFloat32x3(value, value, value);
@@ -364,7 +432,7 @@ FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
/// @returns
/// A 4-dimensional floating point vector with value in each component.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
{
return FfxFloat32x4(value, value, value, value);
@@ -377,7 +445,7 @@ FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
/// @returns
/// A 2-dimensional signed integer vector with value in each component.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxInt32x2 ffxBroadcast2(FfxInt32 value)
{
return FfxInt32x2(value, value);
@@ -390,7 +458,7 @@ FfxInt32x2 ffxBroadcast2(FfxInt32 value)
/// @returns
/// A 3-dimensional signed integer vector with value in each component.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxInt32x3 ffxBroadcast3(FfxInt32 value)
{
return FfxInt32x3(value, value, value);
@@ -403,7 +471,7 @@ FfxInt32x3 ffxBroadcast3(FfxInt32 value)
/// @returns
/// A 4-dimensional signed integer vector with value in each component.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxInt32x4 ffxBroadcast4(FfxInt32 value)
{
return FfxInt32x4(value, value, value, value);
@@ -416,7 +484,7 @@ FfxInt32x4 ffxBroadcast4(FfxInt32 value)
/// @returns
/// A 2-dimensional unsigned integer vector with value in each component.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
{
return FfxUInt32x2(value, value);
@@ -429,7 +497,7 @@ FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
/// @returns
/// A 3-dimensional unsigned integer vector with value in each component.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
{
return FfxUInt32x3(value, value, value);
@@ -442,7 +510,7 @@ FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
/// @returns
/// A 4-dimensional unsigned integer vector with value in each component.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)
{
return FfxUInt32x4(value, value, value, value);
@@ -450,16 +518,16 @@ FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)
///
///
-/// @ingroup GLSL
-FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
+/// @ingroup GLSLCore
+FfxUInt32 ffxBitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
{
return bitfieldExtract(src, FfxInt32(off), FfxInt32(bits));
}
///
///
-/// @ingroup GLSL
-FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
+/// @ingroup GLSLCore
+FfxUInt32 ffxBitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
{
return (ins & mask) | (src & (~mask));
}
@@ -467,8 +535,8 @@ FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
// Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<x and y according to t.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
{
return mix(x, y, t);
@@ -507,7 +575,7 @@ FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
{
return mix(x, y, t);
@@ -527,7 +595,7 @@ FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
{
return mix(x, y, t);
@@ -547,7 +615,7 @@ FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
{
return mix(x, y, t);
@@ -567,7 +635,7 @@ FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
{
return mix(x, y, t);
@@ -587,7 +655,7 @@ FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
{
return mix(x, y, t);
@@ -607,7 +675,7 @@ FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
{
return mix(x, y, t);
@@ -625,7 +693,7 @@ FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
{
return max(x, max(y, z));
@@ -643,7 +711,7 @@ FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
{
return max(x, max(y, z));
@@ -661,7 +729,7 @@ FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
{
return max(x, max(y, z));
@@ -679,7 +747,7 @@ FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
{
return max(x, max(y, z));
@@ -697,7 +765,7 @@ FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
{
return max(x, max(y, z));
@@ -715,7 +783,7 @@ FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
{
return max(x, max(y, z));
@@ -733,7 +801,7 @@ FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
{
return max(x, max(y, z));
@@ -751,7 +819,7 @@ FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
{
return max(x, max(y, z));
@@ -769,7 +837,7 @@ FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
/// @returns
/// The median value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
{
return max(min(x, y), min(max(x, y), z));
@@ -787,7 +855,7 @@ FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
/// @returns
/// The median value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
{
return max(min(x, y), min(max(x, y), z));
@@ -805,7 +873,7 @@ FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
/// @returns
/// The median value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
{
return max(min(x, y), min(max(x, y), z));
@@ -823,7 +891,7 @@ FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
/// @returns
/// The median value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
{
return max(min(x, y), min(max(x, y), z));
@@ -841,7 +909,7 @@ FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
/// @returns
/// The median value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)
{
return max(min(x, y), min(max(x, y), z));
@@ -859,7 +927,7 @@ FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)
/// @returns
/// The median value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)
{
return max(min(x, y), min(max(x, y), z));
@@ -877,7 +945,7 @@ FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)
/// @returns
/// The median value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)
{
return max(min(x, y), min(max(x, y), z));
@@ -895,7 +963,7 @@ FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)
/// @returns
/// The median value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
{
return max(min(x, y), min(max(x, y), z));
@@ -914,7 +982,7 @@ FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
{
return min(x, min(y, z));
@@ -932,7 +1000,7 @@ FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
{
return min(x, min(y, z));
@@ -950,7 +1018,7 @@ FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
{
return min(x, min(y, z));
@@ -968,7 +1036,7 @@ FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
{
return min(x, min(y, z));
@@ -986,7 +1054,7 @@ FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
{
return min(x, min(y, z));
@@ -1004,7 +1072,7 @@ FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
{
return min(x, min(y, z));
@@ -1022,7 +1090,7 @@ FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
{
return min(x, min(y, z));
@@ -1040,7 +1108,7 @@ FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
{
return min(x, min(y, z));
@@ -1054,9 +1122,9 @@ FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
///
/// @returns
/// The reciprocal value of x.
-///
-/// @ingroup GLSL
-FfxFloat32 rcp(FfxFloat32 x)
+///
+/// @ingroup GLSLCore
+FfxFloat32 ffxReciprocal(FfxFloat32 x)
{
return FfxFloat32(1.0) / x;
}
@@ -1070,8 +1138,8 @@ FfxFloat32 rcp(FfxFloat32 x)
/// @returns
/// The reciprocal value of x.
///
-/// @ingroup GLSL
-FfxFloat32x2 rcp(FfxFloat32x2 x)
+/// @ingroup GLSLCore
+FfxFloat32x2 ffxReciprocal(FfxFloat32x2 x)
{
return ffxBroadcast2(1.0) / x;
}
@@ -1085,8 +1153,8 @@ FfxFloat32x2 rcp(FfxFloat32x2 x)
/// @returns
/// The reciprocal value of x.
///
-/// @ingroup GLSL
-FfxFloat32x3 rcp(FfxFloat32x3 x)
+/// @ingroup GLSLCore
+FfxFloat32x3 ffxReciprocal(FfxFloat32x3 x)
{
return ffxBroadcast3(1.0) / x;
}
@@ -1100,8 +1168,8 @@ FfxFloat32x3 rcp(FfxFloat32x3 x)
/// @returns
/// The reciprocal value of x.
///
-/// @ingroup GLSL
-FfxFloat32x4 rcp(FfxFloat32x4 x)
+/// @ingroup GLSLCore
+FfxFloat32x4 ffxReciprocal(FfxFloat32x4 x)
{
return ffxBroadcast4(1.0) / x;
}
@@ -1115,8 +1183,8 @@ FfxFloat32x4 rcp(FfxFloat32x4 x)
/// @returns
/// The reciprocal square root value of x.
///
-/// @ingroup GLSL
-FfxFloat32 rsqrt(FfxFloat32 x)
+/// @ingroup GLSLCore
+FfxFloat32 ffxRsqrt(FfxFloat32 x)
{
return FfxFloat32(1.0) / ffxSqrt(x);
}
@@ -1130,8 +1198,8 @@ FfxFloat32 rsqrt(FfxFloat32 x)
/// @returns
/// The reciprocal square root value of x.
///
-/// @ingroup GLSL
-FfxFloat32x2 rsqrt(FfxFloat32x2 x)
+/// @ingroup GLSLCore
+FfxFloat32x2 ffxRsqrt(FfxFloat32x2 x)
{
return ffxBroadcast2(1.0) / ffxSqrt(x);
}
@@ -1145,8 +1213,8 @@ FfxFloat32x2 rsqrt(FfxFloat32x2 x)
/// @returns
/// The reciprocal square root value of x.
///
-/// @ingroup GLSL
-FfxFloat32x3 rsqrt(FfxFloat32x3 x)
+/// @ingroup GLSLCore
+FfxFloat32x3 ffxRsqrt(FfxFloat32x3 x)
{
return ffxBroadcast3(1.0) / ffxSqrt(x);
}
@@ -1160,7 +1228,7 @@ FfxFloat32x3 rsqrt(FfxFloat32x3 x)
/// @returns
/// The reciprocal square root value of x.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x4 rsqrt(FfxFloat32x4 x)
{
return ffxBroadcast4(1.0) / ffxSqrt(x);
@@ -1173,7 +1241,7 @@ FfxFloat32x4 rsqrt(FfxFloat32x4 x)
/// @returns
/// The clamped version of x.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32 ffxSaturate(FfxFloat32 x)
{
return clamp(x, FfxFloat32(0.0), FfxFloat32(1.0));
@@ -1186,7 +1254,7 @@ FfxFloat32 ffxSaturate(FfxFloat32 x)
/// @returns
/// The clamped version of x.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
{
return clamp(x, ffxBroadcast2(0.0), ffxBroadcast2(1.0));
@@ -1199,7 +1267,7 @@ FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
/// @returns
/// The clamped version of x.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
{
return clamp(x, ffxBroadcast3(0.0), ffxBroadcast3(1.0));
@@ -1212,7 +1280,7 @@ FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
/// @returns
/// The clamped version of x.
///
-/// @ingroup GLSL
+/// @ingroup GLSLCore
FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
{
return clamp(x, ffxBroadcast4(0.0), ffxBroadcast4(1.0));
@@ -1231,7 +1299,7 @@ FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
/// @returns
/// The fractional part of x.
///
-/// @ingroup HLSL
+/// @ingroup GLSLCore
FfxFloat32 ffxFract(FfxFloat32 x)
{
return fract(x);
@@ -1250,7 +1318,7 @@ FfxFloat32 ffxFract(FfxFloat32 x)
/// @returns
/// The fractional part of x.
///
-/// @ingroup HLSL
+/// @ingroup GLSLCore
FfxFloat32x2 ffxFract(FfxFloat32x2 x)
{
return fract(x);
@@ -1269,7 +1337,7 @@ FfxFloat32x2 ffxFract(FfxFloat32x2 x)
/// @returns
/// The fractional part of x.
///
-/// @ingroup HLSL
+/// @ingroup GLSLCore
FfxFloat32x3 ffxFract(FfxFloat32x3 x)
{
return fract(x);
@@ -1288,21 +1356,131 @@ FfxFloat32x3 ffxFract(FfxFloat32x3 x)
/// @returns
/// The fractional part of x.
///
-/// @ingroup HLSL
+/// @ingroup GLSLCore
FfxFloat32x4 ffxFract(FfxFloat32x4 x)
{
return fract(x);
}
-FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
+/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer.
+///
+/// @param [in] x The value to be rounded.
+///
+/// @returns
+/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer.
+///
+/// @ingroup GLSLCore
+FfxFloat32 ffxRound(FfxFloat32 x)
+{
+ return roundEven(x);
+}
+
+/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer.
+///
+/// @param [in] x The value to be rounded.
+///
+/// @returns
+/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer.
+///
+/// @ingroup GLSLCore
+FfxFloat32x2 ffxRound(FfxFloat32x2 x)
+{
+ return roundEven(x);
+}
+
+/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer.
+///
+/// @param [in] x The value to be rounded.
+///
+/// @returns
+/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer.
+///
+/// @ingroup GLSLCore
+FfxFloat32x3 ffxRound(FfxFloat32x3 x)
+{
+ return roundEven(x);
+}
+
+/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer.
+///
+/// @param [in] x The value to be rounded.
+///
+/// @returns
+/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer.
+///
+/// @ingroup GLSLCore
+FfxFloat32x4 ffxRound(FfxFloat32x4 x)
+{
+ return roundEven(x);
+}
+
+FfxUInt32 ffxAShrSU1(FfxUInt32 a, FfxUInt32 b)
{
return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
}
-#if FFX_HALF
+FfxUInt32 ffxPackF32(FfxFloat32x2 v){
+ return packHalf2x16(v);
+}
+FfxFloat32x2 ffxUnpackF32(FfxUInt32 u){
+ return unpackHalf2x16(u);
+}
+
+FfxUInt32x2 ffxPackF32x2(FfxFloat32x4 v){
+ return FfxUInt32x2(ffxPackF32(v.xy), ffxPackF32(v.zw));
+}
+
+FfxFloat32x4 ffxUnpackF32x2(FfxUInt32x2 a){
+ return FfxFloat32x4(ffxUnpackF32(a.x), ffxUnpackF32(a.y));
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32 ffxInvertSafe(FfxFloat32 v){
+ FfxFloat32 s = sign(v);
+ FfxFloat32 s2 = s*s;
+ return s2/(v + s2 - 1.0);
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){
+ FfxFloat32x2 s = sign(v);
+ FfxFloat32x2 s2 = s*s;
+ return s2/(v + s2 - FfxFloat32x2(1.0, 1.0));
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){
+ FfxFloat32x3 s = sign(v);
+ FfxFloat32x3 s2 = s*s;
+ return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0));
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){
+ FfxFloat32x4 s = sign(v);
+ FfxFloat32x4 s2 = s*s;
+ return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0));
+}
+#if FFX_HALF
#define FFX_UINT32_TO_FLOAT16X2(x) unpackFloat2x16(FfxUInt32(x))
+FfxUInt32 ffxPackF16(FfxFloat16x2 v){
+ return packHalf2x16(v);
+}
+
+FfxFloat16x2 ffxUnpackF16(FfxUInt32 u){
+ return FfxFloat16x2(unpackHalf2x16(u));
+}
+
FfxFloat16x4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x)
{
return FfxFloat16x4(unpackFloat2x16(x.x), unpackFloat2x16(x.y));
@@ -1473,40 +1651,6 @@ FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 a)
return mix(x, y, a);
}
//------------------------------------------------------------------------------------------------------------------------------
-// No packed version of ffxMid3.
-FfxFloat16 ffxMed3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
-{
- return max(min(x, y), min(max(x, y), z));
-}
-FfxFloat16x2 ffxMed3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
-{
- return max(min(x, y), min(max(x, y), z));
-}
-FfxFloat16x3 ffxMed3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
-{
- return max(min(x, y), min(max(x, y), z));
-}
-FfxFloat16x4 ffxMed3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
-{
- return max(min(x, y), min(max(x, y), z));
-}
-FfxInt16 ffxMed3Half(FfxInt16 x, FfxInt16 y, FfxInt16 z)
-{
- return max(min(x, y), min(max(x, y), z));
-}
-FfxInt16x2 ffxMed3Half(FfxInt16x2 x, FfxInt16x2 y, FfxInt16x2 z)
-{
- return max(min(x, y), min(max(x, y), z));
-}
-FfxInt16x3 ffxMed3Half(FfxInt16x3 x, FfxInt16x3 y, FfxInt16x3 z)
-{
- return max(min(x, y), min(max(x, y), z));
-}
-FfxInt16x4 ffxMed3Half(FfxInt16x4 x, FfxInt16x4 y, FfxInt16x4 z)
-{
- return max(min(x, y), min(max(x, y), z));
-}
-//------------------------------------------------------------------------------------------------------------------------------
// No packed version of ffxMax3.
FfxFloat16 ffxMax3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
{
@@ -1543,6 +1687,23 @@ FfxFloat16x4 ffxMin3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
return min(x, min(y, z));
}
//------------------------------------------------------------------------------------------------------------------------------
+FfxFloat16 ffxMed3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+FfxFloat16x2 ffxMed3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+FfxFloat16x3 ffxMed3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+FfxFloat16x4 ffxMed3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
FfxFloat16 ffxReciprocalHalf(FfxFloat16 x)
{
return FFX_BROADCAST_FLOAT16(1.0) / x;
@@ -1614,38 +1775,124 @@ FfxUInt16x4 ffxBitShiftRightHalf(FfxUInt16x4 a, FfxUInt16x4 b)
#if defined(FFX_WAVE)
// Where 'x' must be a compile time literal.
-FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)
+FfxFloat32 ffxWaveXorF1(FfxFloat32 v, FfxUInt32 x)
{
return subgroupShuffleXor(v, x);
}
-FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
+FfxFloat32x2 ffxWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
{
return subgroupShuffleXor(v, x);
}
-FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
+FfxFloat32x3 ffxWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
{
return subgroupShuffleXor(v, x);
}
-FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
+FfxFloat32x4 ffxWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
{
return subgroupShuffleXor(v, x);
}
-FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)
+FfxUInt32 ffxWaveXorU1(FfxUInt32 v, FfxUInt32 x)
{
return subgroupShuffleXor(v, x);
}
-FfxUInt32x2 AWaveXorU2(FfxUInt32x2 v, FfxUInt32 x)
+FfxUInt32x2 ffxWaveXorU2(FfxUInt32x2 v, FfxUInt32 x)
{
return subgroupShuffleXor(v, x);
}
-FfxUInt32x3 AWaveXorU3(FfxUInt32x3 v, FfxUInt32 x)
+FfxUInt32x3 ffxWaveXorU3(FfxUInt32x3 v, FfxUInt32 x)
{
return subgroupShuffleXor(v, x);
}
-FfxUInt32x4 AWaveXorU4(FfxUInt32x4 v, FfxUInt32 x)
+FfxUInt32x4 ffxWaveXorU4(FfxUInt32x4 v, FfxUInt32 x)
{
return subgroupShuffleXor(v, x);
}
+FfxBoolean ffxWaveIsFirstLane()
+{
+ return subgroupElect();
+}
+FfxUInt32 ffxWaveLaneIndex()
+{
+ return gl_SubgroupInvocationID;
+}
+FfxBoolean ffxWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x )
+{
+ return subgroupShuffle(v, x);
+}
+FfxUInt32 ffxWavePrefixCountBits(FfxBoolean v)
+{
+ return subgroupBallotExclusiveBitCount(subgroupBallot(v));
+}
+FfxUInt32 ffxWaveActiveCountBits(FfxBoolean v)
+{
+ return subgroupBallotBitCount(subgroupBallot(v));
+}
+FfxUInt32 ffxWaveReadLaneFirstU1(FfxUInt32 v)
+{
+ return subgroupBroadcastFirst(v);
+}
+FfxUInt32x2 ffxWaveReadLaneFirstU2(FfxUInt32x2 v)
+{
+ return subgroupBroadcastFirst(v);
+}
+FfxBoolean ffxWaveReadLaneFirstB1(FfxBoolean v)
+{
+ return subgroupBroadcastFirst(v);
+}
+FfxUInt32 ffxWaveOr(FfxUInt32 a)
+{
+ return subgroupOr(a);
+}
+FfxUInt32 ffxWaveMin(FfxUInt32 a)
+{
+ return subgroupMin(a);
+}
+FfxFloat32 ffxWaveMin(FfxFloat32 a)
+{
+ return subgroupMin(a);
+}
+FfxUInt32 ffxWaveMax(FfxUInt32 a)
+{
+ return subgroupMax(a);
+}
+FfxFloat32 ffxWaveMax(FfxFloat32 a)
+{
+ return subgroupMax(a);
+}
+FfxUInt32 ffxWaveSum(FfxUInt32 a)
+{
+ return subgroupAdd(a);
+}
+FfxFloat32 ffxWaveSum(FfxFloat32 a)
+{
+ return subgroupAdd(a);
+}
+FfxUInt32 ffxWaveLaneCount()
+{
+ return gl_SubgroupSize;
+}
+#if defined(FFX_WAVE_ALL_TRUE)
+FfxBoolean ffxWaveAllTrue(FfxBoolean v)
+{
+ return subgroupAll(v);
+}
+#endif
+FfxFloat32 ffxQuadReadX(FfxFloat32 v)
+{
+ return subgroupQuadSwapHorizontal(v);
+}
+FfxFloat32x2 ffxQuadReadX(FfxFloat32x2 v)
+{
+ return subgroupQuadSwapHorizontal(v);
+}
+FfxFloat32 ffxQuadReadY(FfxFloat32 v)
+{
+ return subgroupQuadSwapVertical(v);
+}
+FfxFloat32x2 ffxQuadReadY(FfxFloat32x2 v)
+{
+ return subgroupQuadSwapVertical(v);
+}
//------------------------------------------------------------------------------------------------------------------------------
#if FFX_HALF
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h b/thirdparty/amd-ffx/gpu/ffx_core_gpu_common.h
similarity index 89%
rename from thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h
rename to thirdparty/amd-ffx/gpu/ffx_core_gpu_common.h
index ae07642f0df3..da03b07aaf99 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h
+++ b/thirdparty/amd-ffx/gpu/ffx_core_gpu_common.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -21,81 +22,34 @@
/// A define for a true value in a boolean expression.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
#define FFX_TRUE (true)
/// A define for a false value in a boolean expression.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
#define FFX_FALSE (false)
/// A define value for positive infinity.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
#define FFX_POSITIVE_INFINITY_FLOAT ffxAsFloat(0x7f800000u)
/// A define value for negative infinity.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
#define FFX_NEGATIVE_INFINITY_FLOAT ffxAsFloat(0xff800000u)
/// A define value for PI.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
#define FFX_PI (3.14159)
+FFX_STATIC const FfxFloat32 FFX_FP16_MIN = 6.10e-05f;
+FFX_STATIC const FfxFloat32 FFX_FP16_MAX = 65504.0f;
+FFX_STATIC const FfxFloat32 FFX_TONEMAP_EPSILON = 1.0f / FFX_FP16_MAX;
-/// Compute the reciprocal of value.
-///
-/// @param [in] value The value to compute the reciprocal of.
-///
-/// @returns
-/// The 1 / value.
-///
-/// @ingroup GPU
-FfxFloat32 ffxReciprocal(FfxFloat32 value)
-{
- return rcp(value);
-}
-
-/// Compute the reciprocal of value.
-///
-/// @param [in] value The value to compute the reciprocal of.
-///
-/// @returns
-/// The 1 / value.
-///
-/// @ingroup GPU
-FfxFloat32x2 ffxReciprocal(FfxFloat32x2 value)
-{
- return rcp(value);
-}
-
-/// Compute the reciprocal of value.
-///
-/// @param [in] value The value to compute the reciprocal of.
-///
-/// @returns
-/// The 1 / value.
-///
-/// @ingroup GPU
-FfxFloat32x3 ffxReciprocal(FfxFloat32x3 value)
-{
- return rcp(value);
-}
-
-/// Compute the reciprocal of value.
-///
-/// @param [in] value The value to compute the reciprocal of.
-///
-/// @returns
-/// The 1 / value.
-///
-/// @ingroup GPU
-FfxFloat32x4 ffxReciprocal(FfxFloat32x4 value)
-{
- return rcp(value);
-}
+#define FFX_HAS_FLAG(v, f) ((v & f) == f)
/// Compute the min of two values.
///
@@ -105,7 +59,7 @@ FfxFloat32x4 ffxReciprocal(FfxFloat32x4 value)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
{
return min(x, y);
@@ -119,7 +73,7 @@ FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y)
{
return min(x, y);
@@ -133,7 +87,7 @@ FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y)
{
return min(x, y);
@@ -147,7 +101,7 @@ FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y)
{
return min(x, y);
@@ -161,7 +115,7 @@ FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y)
{
return min(x, y);
@@ -175,7 +129,7 @@ FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y)
{
return min(x, y);
@@ -189,7 +143,7 @@ FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y)
{
return min(x, y);
@@ -203,7 +157,7 @@ FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y)
{
return min(x, y);
@@ -217,7 +171,7 @@ FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y)
{
return min(x, y);
@@ -231,7 +185,7 @@ FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y)
{
return min(x, y);
@@ -245,7 +199,7 @@ FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y)
{
return min(x, y);
@@ -259,7 +213,7 @@ FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y)
{
return min(x, y);
@@ -273,7 +227,7 @@ FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y)
{
return max(x, y);
@@ -287,7 +241,7 @@ FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y)
{
return max(x, y);
@@ -301,7 +255,7 @@ FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y)
{
return max(x, y);
@@ -315,7 +269,7 @@ FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y)
{
return max(x, y);
@@ -329,7 +283,7 @@ FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y)
{
return max(x, y);
@@ -343,7 +297,7 @@ FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y)
{
return max(x, y);
@@ -357,7 +311,7 @@ FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y)
{
return max(x, y);
@@ -371,7 +325,7 @@ FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y)
{
return max(x, y);
@@ -385,7 +339,7 @@ FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
{
return max(x, y);
@@ -399,7 +353,7 @@ FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y)
{
return max(x, y);
@@ -413,7 +367,7 @@ FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y)
{
return max(x, y);
@@ -427,7 +381,7 @@ FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y)
{
return max(x, y);
@@ -441,7 +395,7 @@ FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y)
/// @returns
/// The value of the first parameter raised to the power of the second.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y)
{
return pow(x, y);
@@ -455,7 +409,7 @@ FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y)
/// @returns
/// The value of the first parameter raised to the power of the second.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y)
{
return pow(x, y);
@@ -469,7 +423,7 @@ FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y)
/// @returns
/// The value of the first parameter raised to the power of the second.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y)
{
return pow(x, y);
@@ -483,7 +437,7 @@ FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y)
/// @returns
/// The value of the first parameter raised to the power of the second.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y)
{
return pow(x, y);
@@ -496,7 +450,7 @@ FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y)
/// @returns
/// The the square root of x.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxSqrt(FfxFloat32 x)
{
return sqrt(x);
@@ -509,7 +463,7 @@ FfxFloat32 ffxSqrt(FfxFloat32 x)
/// @returns
/// The the square root of x.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxSqrt(FfxFloat32x2 x)
{
return sqrt(x);
@@ -522,7 +476,7 @@ FfxFloat32x2 ffxSqrt(FfxFloat32x2 x)
/// @returns
/// The the square root of x.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxSqrt(FfxFloat32x3 x)
{
return sqrt(x);
@@ -535,7 +489,7 @@ FfxFloat32x3 ffxSqrt(FfxFloat32x3 x)
/// @returns
/// The the square root of x.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxSqrt(FfxFloat32x4 x)
{
return sqrt(x);
@@ -549,7 +503,7 @@ FfxFloat32x4 ffxSqrt(FfxFloat32x4 x)
/// @returns
/// The value of d with the sign bit from s.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s)
{
return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & FfxUInt32(0x80000000u)));
@@ -563,7 +517,7 @@ FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s)
/// @returns
/// The value of d with the sign bit from s.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s)
{
return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast2(0x80000000u)));
@@ -577,7 +531,7 @@ FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s)
/// @returns
/// The value of d with the sign bit from s.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s)
{
return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast3(0x80000000u)));
@@ -591,7 +545,7 @@ FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s)
/// @returns
/// The value of d with the sign bit from s.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s)
{
return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast4(0x80000000u)));
@@ -614,7 +568,7 @@ FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s)
/// @returns
/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxIsSigned(FfxFloat32 m)
{
return ffxSaturate(m * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT));
@@ -637,7 +591,7 @@ FfxFloat32 ffxIsSigned(FfxFloat32 m)
/// @returns
/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m)
{
return ffxSaturate(m * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT));
@@ -660,7 +614,7 @@ FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m)
/// @returns
/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m)
{
return ffxSaturate(m * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT));
@@ -683,7 +637,7 @@ FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m)
/// @returns
/// 1.0 when the value is negative, or 0.0 when the value is 0 or positive.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m)
{
return ffxSaturate(m * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT));
@@ -701,7 +655,7 @@ FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m)
/// @returns
/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m)
{
return ffxSaturate(m * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT));
@@ -719,7 +673,7 @@ FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m)
/// @returns
/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m)
{
return ffxSaturate(m * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT));
@@ -737,7 +691,7 @@ FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m)
/// @returns
/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m)
{
return ffxSaturate(m * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT));
@@ -755,7 +709,7 @@ FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m)
/// @returns
/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m)
{
return ffxSaturate(m * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT));
@@ -775,10 +729,10 @@ FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m)
/// @returns
/// The sortable integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value)
{
- return value ^ ((AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000));
+ return value ^ ((ffxAShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000));
}
/// Convert a sortable integer to a 32bit floating point value.
@@ -792,10 +746,10 @@ FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value)
/// @returns
/// The sortable integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value)
{
- return value ^ ((~AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000));
+ return value ^ ((~ffxAShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000));
}
/// Calculate a low-quality approximation for the square root of a value.
@@ -811,10 +765,10 @@ FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value)
/// @returns
/// An approximation of the square root, estimated to low quality.
///
-/// @ingroup GPU
-FfxFloat32 ffxApproximateSqrt(FfxFloat32 a)
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateSqrt(FfxFloat32 value)
{
- return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(1)) + FfxUInt32(0x1fbc4639));
+ return ffxAsFloat((ffxAsUInt32(value) >> FfxUInt32(1)) + FfxUInt32(0x1fbc4639));
}
/// Calculate a low-quality approximation for the reciprocal of a value.
@@ -830,10 +784,10 @@ FfxFloat32 ffxApproximateSqrt(FfxFloat32 a)
/// @returns
/// An approximation of the reciprocal, estimated to low quality.
///
-/// @ingroup GPU
-FfxFloat32 ffxApproximateReciprocal(FfxFloat32 a)
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateReciprocal(FfxFloat32 value)
{
- return ffxAsFloat(FfxUInt32(0x7ef07ebb) - ffxAsUInt32(a));
+ return ffxAsFloat(FfxUInt32(0x7ef07ebb) - ffxAsUInt32(value));
}
/// Calculate a medium-quality approximation for the reciprocal of a value.
@@ -849,7 +803,7 @@ FfxFloat32 ffxApproximateReciprocal(FfxFloat32 a)
/// @returns
/// An approximation of the reciprocal, estimated to medium quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value)
{
FfxFloat32 b = ffxAsFloat(FfxUInt32(0x7ef19fff) - ffxAsUInt32(value));
@@ -869,10 +823,10 @@ FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value)
/// @returns
/// An approximation of the reciprocal square root, estimated to low quality.
///
-/// @ingroup GPU
-FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 a)
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 value)
{
- return ffxAsFloat(FfxUInt32(0x5f347d74) - (ffxAsUInt32(a) >> FfxUInt32(1)));
+ return ffxAsFloat(FfxUInt32(0x5f347d74) - (ffxAsUInt32(value) >> FfxUInt32(1)));
}
/// Calculate a low-quality approximation for the square root of a value.
@@ -888,10 +842,10 @@ FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 a)
/// @returns
/// An approximation of the square root, estimated to low quality.
///
-/// @ingroup GPU
-FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 a)
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 value)
{
- return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u));
+ return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u));
}
/// Calculate a low-quality approximation for the reciprocal of a value.
@@ -907,10 +861,10 @@ FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 a)
/// @returns
/// An approximation of the reciprocal, estimated to low quality.
///
-/// @ingroup GPU
-FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 a)
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 value)
{
- return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(a));
+ return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(value));
}
/// Calculate a medium-quality approximation for the reciprocal of a value.
@@ -926,11 +880,11 @@ FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 a)
/// @returns
/// An approximation of the reciprocal, estimated to medium quality.
///
-/// @ingroup GPU
-FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 a)
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 value)
{
- FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(a));
- return b * (-b * a + ffxBroadcast2(2.0f));
+ FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(value));
+ return b * (-b * value + ffxBroadcast2(2.0f));
}
/// Calculate a low-quality approximation for the square root of a value.
@@ -946,10 +900,10 @@ FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 a)
/// @returns
/// An approximation of the square root, estimated to low quality.
///
-/// @ingroup GPU
-FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 a)
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 value)
{
- return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast2(1u)));
+ return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast2(1u)));
}
/// Calculate a low-quality approximation for the square root of a value.
@@ -965,10 +919,10 @@ FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 a)
/// @returns
/// An approximation of the square root, estimated to low quality.
///
-/// @ingroup GPU
-FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 a)
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 value)
{
- return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u));
+ return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u));
}
/// Calculate a low-quality approximation for the reciprocal of a value.
@@ -984,10 +938,10 @@ FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 a)
/// @returns
/// An approximation of the reciprocal, estimated to low quality.
///
-/// @ingroup GPU
-FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 a)
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 value)
{
- return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(a));
+ return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(value));
}
/// Calculate a medium-quality approximation for the reciprocal of a value.
@@ -1003,11 +957,11 @@ FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 a)
/// @returns
/// An approximation of the reciprocal, estimated to medium quality.
///
-/// @ingroup GPU
-FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 a)
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 value)
{
- FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(a));
- return b * (-b * a + ffxBroadcast3(2.0f));
+ FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(value));
+ return b * (-b * value + ffxBroadcast3(2.0f));
}
/// Calculate a low-quality approximation for the square root of a value.
@@ -1023,10 +977,10 @@ FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 a)
/// @returns
/// An approximation of the square root, estimated to low quality.
///
-/// @ingroup GPU
-FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 a)
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 value)
{
- return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast3(1u)));
+ return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast3(1u)));
}
/// Calculate a low-quality approximation for the square root of a value.
@@ -1042,10 +996,10 @@ FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 a)
/// @returns
/// An approximation of the square root, estimated to low quality.
///
-/// @ingroup GPU
-FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 a)
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 value)
{
- return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u));
+ return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u));
}
/// Calculate a low-quality approximation for the reciprocal of a value.
@@ -1061,10 +1015,10 @@ FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 a)
/// @returns
/// An approximation of the reciprocal, estimated to low quality.
///
-/// @ingroup GPU
-FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 a)
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 value)
{
- return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(a));
+ return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(value));
}
/// Calculate a medium-quality approximation for the reciprocal of a value.
@@ -1080,11 +1034,11 @@ FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 a)
/// @returns
/// An approximation of the reciprocal, estimated to medium quality.
///
-/// @ingroup GPU
-FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 a)
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 value)
{
- FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(a));
- return b * (-b * a + ffxBroadcast4(2.0f));
+ FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(value));
+ return b * (-b * value + ffxBroadcast4(2.0f));
}
/// Calculate a low-quality approximation for the square root of a value.
@@ -1100,10 +1054,10 @@ FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 a)
/// @returns
/// An approximation of the square root, estimated to low quality.
///
-/// @ingroup GPU
-FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 a)
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 value)
{
- return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast4(1u)));
+ return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast4(1u)));
}
/// Calculate dot product of 'a' and 'b'.
@@ -1114,7 +1068,7 @@ FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 a)
/// @returns
/// The value of a dot b.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b)
{
return dot(a, b);
@@ -1128,7 +1082,7 @@ FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b)
/// @returns
/// The value of a dot b.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b)
{
return dot(a, b);
@@ -1142,7 +1096,7 @@ FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b)
/// @returns
/// The value of a dot b.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
{
return dot(a, b);
@@ -1160,7 +1114,7 @@ FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
/// @returns
/// The value a converted into Gamma2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a)
{
return a * a * a * a;
@@ -1177,7 +1131,7 @@ FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a)
/// @returns
/// The value a converted into linear.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a)
{
return a * a * a * a * a * a * a * a;
@@ -1194,7 +1148,7 @@ FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a)
{
return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46));
@@ -1211,7 +1165,7 @@ FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a)
{
FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46));
@@ -1230,7 +1184,7 @@ FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a)
{
return ffxSqrt(ffxSqrt(a));
@@ -1247,7 +1201,7 @@ FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a)
{
return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723));
@@ -1264,7 +1218,7 @@ FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a)
{
FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723));
@@ -1283,7 +1237,7 @@ FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a)
{
return ffxSqrt(ffxSqrt(ffxSqrt(a)));
@@ -1300,7 +1254,7 @@ FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a)
/// @returns
/// The value a converted into Gamma2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a)
{
return a * a * a * a;
@@ -1317,7 +1271,7 @@ FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a)
/// @returns
/// The value a converted into linear.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a)
{
return a * a * a * a * a * a * a * a;
@@ -1334,7 +1288,7 @@ FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a)
{
return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u));
@@ -1351,7 +1305,7 @@ FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a)
{
FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u));
@@ -1370,7 +1324,7 @@ FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a)
{
return ffxSqrt(ffxSqrt(a));
@@ -1387,7 +1341,7 @@ FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a)
{
return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u));
@@ -1404,7 +1358,7 @@ FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a)
{
FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u));
@@ -1423,7 +1377,7 @@ FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a)
{
return ffxSqrt(ffxSqrt(ffxSqrt(a)));
@@ -1440,7 +1394,7 @@ FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a)
/// @returns
/// The value a converted into Gamma2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a)
{
return a * a * a * a;
@@ -1457,7 +1411,7 @@ FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a)
/// @returns
/// The value a converted into linear.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a)
{
return a * a * a * a * a * a * a * a;
@@ -1474,7 +1428,7 @@ FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a)
{
return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u));
@@ -1491,7 +1445,7 @@ FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a)
{
FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u));
@@ -1510,7 +1464,7 @@ FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a)
{
return ffxSqrt(ffxSqrt(a));
@@ -1527,7 +1481,7 @@ FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a)
{
return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u));
@@ -1544,7 +1498,7 @@ FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a)
{
FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u));
@@ -1563,7 +1517,7 @@ FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a)
{
return ffxSqrt(ffxSqrt(ffxSqrt(a)));
@@ -1580,7 +1534,7 @@ FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a)
/// @returns
/// The value a converted into Gamma2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a)
{
return a * a * a * a;
@@ -1597,7 +1551,7 @@ FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a)
/// @returns
/// The value a converted into linear.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a)
{
return a * a * a * a * a * a * a * a;
@@ -1614,7 +1568,7 @@ FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a)
{
return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u));
@@ -1631,7 +1585,7 @@ FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a)
{
FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u));
@@ -1650,7 +1604,7 @@ FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a)
{
return ffxSqrt(ffxSqrt(a));
@@ -1667,7 +1621,7 @@ FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a)
{
return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u));
@@ -1684,7 +1638,7 @@ FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a)
{
FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u));
@@ -1703,7 +1657,7 @@ FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a)
/// @returns
/// The value a converted into PQ.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxApproximateLinearToPQHigh(FfxFloat32x4 a)
{
return ffxSqrt(ffxSqrt(ffxSqrt(a)));
@@ -1793,7 +1747,7 @@ FfxFloat32x2 ffxParabolicSinCos(FfxFloat32 x)
/// @returns
/// Result of the AND operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y)
{
return min(x, y);
@@ -1807,7 +1761,7 @@ FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y)
/// @returns
/// Result of the AND operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y)
{
return min(x, y);
@@ -1821,7 +1775,7 @@ FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y)
/// @returns
/// Result of the AND operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y)
{
return min(x, y);
@@ -1835,7 +1789,7 @@ FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y)
/// @returns
/// Result of the AND operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y)
{
return min(x, y);
@@ -1844,12 +1798,11 @@ FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y)
/// Conditional free logic NOT operation using two values.
///
/// @param [in] x The first value to be fed into the NOT operator.
-/// @param [in] y The second value to be fed into the NOT operator.
///
/// @returns
/// Result of the NOT operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxZeroOneAnd(FfxUInt32 x)
{
return x ^ FfxUInt32(1);
@@ -1858,12 +1811,11 @@ FfxUInt32 ffxZeroOneAnd(FfxUInt32 x)
/// Conditional free logic NOT operation using two values.
///
/// @param [in] x The first value to be fed into the NOT operator.
-/// @param [in] y The second value to be fed into the NOT operator.
///
/// @returns
/// Result of the NOT operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x)
{
return x ^ ffxBroadcast2(1u);
@@ -1872,12 +1824,11 @@ FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x)
/// Conditional free logic NOT operation using two values.
///
/// @param [in] x The first value to be fed into the NOT operator.
-/// @param [in] y The second value to be fed into the NOT operator.
///
/// @returns
/// Result of the NOT operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x)
{
return x ^ ffxBroadcast3(1u);
@@ -1886,12 +1837,11 @@ FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x)
/// Conditional free logic NOT operation using two values.
///
/// @param [in] x The first value to be fed into the NOT operator.
-/// @param [in] y The second value to be fed into the NOT operator.
///
/// @returns
/// Result of the NOT operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x)
{
return x ^ ffxBroadcast4(1u);
@@ -1905,7 +1855,7 @@ FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y)
{
return max(x, y);
@@ -1919,7 +1869,7 @@ FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y)
{
return max(x, y);
@@ -1933,7 +1883,7 @@ FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y)
{
return max(x, y);
@@ -1947,7 +1897,7 @@ FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y)
{
return max(x, y);
@@ -1960,7 +1910,7 @@ FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x)
{
return FfxUInt32(FfxFloat32(1.0) - x);
@@ -1973,7 +1923,7 @@ FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x)
{
return FfxUInt32x2(ffxBroadcast2(1.0) - x);
@@ -1986,7 +1936,7 @@ FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x)
{
return FfxUInt32x3(ffxBroadcast3(1.0) - x);
@@ -1999,7 +1949,7 @@ FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x)
{
return FfxUInt32x4(ffxBroadcast4(1.0) - x);
@@ -2015,7 +1965,7 @@ FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
{
return ffxSaturate(x * y + z);
@@ -2031,7 +1981,7 @@ FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
{
return ffxSaturate(x * y + z);
@@ -2047,7 +1997,7 @@ FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
{
return ffxSaturate(x * y + z);
@@ -2063,7 +2013,7 @@ FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
{
return ffxSaturate(x * y + z);
@@ -2076,7 +2026,7 @@ FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
/// @returns
/// Result of the greater than zero comparison.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x)
{
return ffxSaturate(x * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT));
@@ -2089,7 +2039,7 @@ FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x)
/// @returns
/// Result of the greater than zero comparison.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x)
{
return ffxSaturate(x * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT));
@@ -2102,7 +2052,7 @@ FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x)
/// @returns
/// Result of the greater than zero comparison.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x)
{
return ffxSaturate(x * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT));
@@ -2115,7 +2065,7 @@ FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x)
/// @returns
/// Result of the greater than zero comparison.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x)
{
return ffxSaturate(x * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT));
@@ -2128,7 +2078,7 @@ FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxZeroOneAnd(FfxFloat32 x)
{
return FfxFloat32(1.0) - x;
@@ -2141,7 +2091,7 @@ FfxFloat32 ffxZeroOneAnd(FfxFloat32 x)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x)
{
return ffxBroadcast2(1.0) - x;
@@ -2154,7 +2104,7 @@ FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x)
{
return ffxBroadcast3(1.0) - x;
@@ -2167,7 +2117,7 @@ FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x)
{
return ffxBroadcast4(1.0) - x;
@@ -2181,7 +2131,7 @@ FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y)
{
return max(x, y);
@@ -2195,7 +2145,7 @@ FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y)
{
return max(x, y);
@@ -2209,7 +2159,7 @@ FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y)
{
return max(x, y);
@@ -2223,7 +2173,7 @@ FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y)
{
return max(x, y);
@@ -2238,7 +2188,7 @@ FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y)
/// @returns
/// The selected value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
{
FfxFloat32 r = (-x) * z + z;
@@ -2254,7 +2204,7 @@ FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
/// @returns
/// The selected value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
{
FfxFloat32x2 r = (-x) * z + z;
@@ -2270,7 +2220,7 @@ FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
/// @returns
/// The selected value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
{
FfxFloat32x3 r = (-x) * z + z;
@@ -2286,7 +2236,7 @@ FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
/// @returns
/// The selected value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
{
FfxFloat32x4 r = (-x) * z + z;
@@ -2300,7 +2250,7 @@ FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
/// @returns
/// Result of the sign value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x)
{
return ffxSaturate(x * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT));
@@ -2313,7 +2263,7 @@ FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x)
/// @returns
/// Result of the sign value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x)
{
return ffxSaturate(x * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT));
@@ -2326,7 +2276,7 @@ FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x)
/// @returns
/// Result of the sign value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x)
{
return ffxSaturate(x * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT));
@@ -2339,7 +2289,7 @@ FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x)
/// @returns
/// Result of the sign value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x)
{
return ffxSaturate(x * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT));
@@ -2358,7 +2308,7 @@ FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x)
/// @returns
/// The color in linear space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32 ffxRec709FromLinear(FfxFloat32 color)
{
FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
@@ -2379,7 +2329,7 @@ FfxFloat32 ffxRec709FromLinear(FfxFloat32 color)
/// @returns
/// The color in linear space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color)
{
FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
@@ -2400,7 +2350,7 @@ FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color)
/// @returns
/// The color in linear space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color)
{
FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
@@ -2408,6 +2358,51 @@ FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color)
return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy);
}
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] color The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxLinearFromRec709(FfxFloat32 color)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+ FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
+ return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] color The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+ FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
+ return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] color The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+ FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
+ return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz));
+}
+
/// Compute a gamma value from a linear value.
///
/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
@@ -2420,10 +2415,10 @@ FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color)
/// @returns
/// A value in gamma space.
///
-/// @ingroup GPU
-FfxFloat32 ffxGammaFromLinear(FfxFloat32 color, FfxFloat32 rcpX)
+/// @ingroup GPUCore
+FfxFloat32 ffxGammaFromLinear(FfxFloat32 value, FfxFloat32 power)
{
- return pow(color, FfxFloat32(rcpX));
+ return pow(value, FfxFloat32(power));
}
/// Compute a gamma value from a linear value.
@@ -2438,10 +2433,10 @@ FfxFloat32 ffxGammaFromLinear(FfxFloat32 color, FfxFloat32 rcpX)
/// @returns
/// A value in gamma space.
///
-/// @ingroup GPU
-FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 color, FfxFloat32 rcpX)
+/// @ingroup GPUCore
+FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 value, FfxFloat32 power)
{
- return pow(color, ffxBroadcast2(rcpX));
+ return pow(value, ffxBroadcast2(power));
}
/// Compute a gamma value from a linear value.
@@ -2456,238 +2451,193 @@ FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 color, FfxFloat32 rcpX)
/// @returns
/// A value in gamma space.
///
-/// @ingroup GPU
-FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 color, FfxFloat32 rcpX)
+/// @ingroup GPUCore
+FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 value, FfxFloat32 power)
{
- return pow(color, ffxBroadcast3(rcpX));
+ return pow(value, ffxBroadcast3(power));
}
-/// Compute a PQ value from a linear value.
-///
-/// @param [in] value The value to convert to PQ from linear.
-///
-/// @returns
-/// A value in linear space.
+/// Compute a linear value from a value in a gamma space.
///
-/// @ingroup GPU
-FfxFloat32 ffxPQToLinear(FfxFloat32 x)
-{
- FfxFloat32 p = pow(x, FfxFloat32(0.159302));
- return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438));
-}
-
-/// Compute a PQ value from a linear value.
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
///
-/// @param [in] value The value to convert to PQ from linear.
+/// @param [in] color The value to convert to linear in gamma space.
+/// @param [in] power The power value used for the gamma curve.
///
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
-FfxFloat32x2 ffxPQToLinear(FfxFloat32x2 x)
+/// @ingroup GPUCore
+FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power)
{
- FfxFloat32x2 p = pow(x, ffxBroadcast2(0.159302));
- return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438));
+ return pow(color, FfxFloat32(power));
}
-/// Compute a PQ value from a linear value.
-///
-/// @param [in] value The value to convert to PQ from linear.
-///
-/// @returns
-/// A value in linear space.
+/// Compute a linear value from a value in a gamma space.
///
-/// @ingroup GPU
-FfxFloat32x3 ffxPQToLinear(FfxFloat32x3 x)
-{
- FfxFloat32x3 p = pow(x, ffxBroadcast3(0.159302));
- return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438));
-}
-
-/// Compute a linear value from a SRGB value.
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
///
-/// @param [in] value The value to convert to linear from SRGB.
+/// @param [in] color The value to convert to linear in gamma space.
+/// @param [in] power The power value used for the gamma curve.
///
/// @returns
-/// A value in SRGB space.
+/// A value in linear space.
///
-/// @ingroup GPU
-FfxFloat32 ffxSrgbToLinear(FfxFloat32 color)
+/// @ingroup GPUCore
+FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power)
{
- FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
- FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
- return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y);
+ return pow(color, ffxBroadcast2(power));
}
-/// Compute a linear value from a SRGB value.
-///
-/// @param [in] value The value to convert to linear from SRGB.
-///
-/// @returns
-/// A value in SRGB space.
+/// Compute a linear value from a value in a gamma space.
///
-/// @ingroup GPU
-FfxFloat32x2 ffxSrgbToLinear(FfxFloat32x2 color)
-{
- FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
- FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
- return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy);
-}
-
-/// Compute a linear value from a SRGB value.
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
///
-/// @param [in] value The value to convert to linear from SRGB.
+/// @param [in] color The value to convert to linear in gamma space.
+/// @param [in] power The power value used for the gamma curve.
///
/// @returns
-/// A value in SRGB space.
+/// A value in linear space.
///
-/// @ingroup GPU
-FfxFloat32x3 ffxSrgbToLinear(FfxFloat32x3 color)
+/// @ingroup GPUCore
+FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power)
{
- FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
- FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
- return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy);
+ return pow(color, ffxBroadcast3(power));
}
-/// Compute a linear value from a REC.709 value.
+/// Compute a PQ value from a linear value.
///
-/// @param [in] color The value to convert to linear from REC.709.
+/// @param [in] value The value to convert to PQ from linear.
///
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
-FfxFloat32 ffxLinearFromRec709(FfxFloat32 color)
+/// @ingroup GPUCore
+FfxFloat32 ffxPQFromLinear(FfxFloat32 value)
{
- FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
- FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
- return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z));
+ FfxFloat32 p = pow(value, FfxFloat32(0.159302));
+ return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438));
}
-/// Compute a linear value from a REC.709 value.
+/// Compute a PQ value from a linear value.
///
-/// @param [in] color The value to convert to linear from REC.709.
+/// @param [in] value The value to convert to PQ from linear.
///
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
-FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color)
+/// @ingroup GPUCore
+FfxFloat32x2 ffxPQFromLinear(FfxFloat32x2 value)
{
- FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
- FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
- return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz));
+ FfxFloat32x2 p = pow(value, ffxBroadcast2(0.159302));
+ return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438));
}
-/// Compute a linear value from a REC.709 value.
+/// Compute a PQ value from a linear value.
///
-/// @param [in] color The value to convert to linear from REC.709.
+/// @param [in] value The value to convert to PQ from linear.
///
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
-FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color)
+/// @ingroup GPUCore
+FfxFloat32x3 ffxPQFromLinear(FfxFloat32x3 value)
{
- FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
- FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
- return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz));
+ FfxFloat32x3 p = pow(value, ffxBroadcast3(0.159302));
+ return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438));
}
-/// Compute a linear value from a value in a gamma space.
+/// Compute a linear value from a value in a PQ space.
///
/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
///
-/// @param [in] color The value to convert to linear in gamma space.
-/// @param [in] power The power value used for the gamma curve.
+/// @param [in] value The value to convert to linear in PQ space.
///
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
-FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power)
+/// @ingroup GPUCore
+FfxFloat32 ffxLinearFromPQ(FfxFloat32 value)
{
- return pow(color, FfxFloat32(power));
+ FfxFloat32 p = pow(value, FfxFloat32(0.0126833));
+ return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739));
}
-/// Compute a linear value from a value in a gamma space.
+/// Compute a linear value from a value in a PQ space.
///
/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
///
-/// @param [in] color The value to convert to linear in gamma space.
-/// @param [in] power The power value used for the gamma curve.
+/// @param [in] value The value to convert to linear in PQ space.
///
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
-FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power)
+/// @ingroup GPUCore
+FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 value)
{
- return pow(color, ffxBroadcast2(power));
+ FfxFloat32x2 p = pow(value, ffxBroadcast2(0.0126833));
+ return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739));
}
-/// Compute a linear value from a value in a gamma space.
+/// Compute a linear value from a value in a PQ space.
///
/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
///
-/// @param [in] color The value to convert to linear in gamma space.
-/// @param [in] power The power value used for the gamma curve.
+/// @param [in] value The value to convert to linear in PQ space.
///
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
-FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power)
+/// @ingroup GPUCore
+FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 value)
{
- return pow(color, ffxBroadcast3(power));
+ FfxFloat32x3 p = pow(value, ffxBroadcast3(0.0126833));
+ return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739));
}
-/// Compute a linear value from a value in a PQ space.
-///
-/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+/// Compute an SRGB value from a linear value.
///
-/// @param [in] value The value to convert to linear in PQ space.
+/// @param [in] value The value to convert to SRGB from linear.
///
/// @returns
-/// A value in linear space.
+/// A value in SRGB space.
///
-/// @ingroup GPU
-FfxFloat32 ffxLinearFromPQ(FfxFloat32 x)
+/// @ingroup GPUCore
+FfxFloat32 ffxSrgbFromLinear(FfxFloat32 value)
{
- FfxFloat32 p = pow(x, FfxFloat32(0.0126833));
- return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739));
+ FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+ FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
+ return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y);
}
-/// Compute a linear value from a value in a PQ space.
+/// Compute an SRGB value from a linear value.
///
-/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
-///
-/// @param [in] value The value to convert to linear in PQ space.
+/// @param [in] value The value to convert to SRGB from linear.
///
/// @returns
-/// A value in linear space.
+/// A value in SRGB space.
///
-/// @ingroup GPU
-FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 x)
+/// @ingroup GPUCore
+FfxFloat32x2 ffxSrgbFromLinear(FfxFloat32x2 value)
{
- FfxFloat32x2 p = pow(x, ffxBroadcast2(0.0126833));
- return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739));
+ FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+ FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
+ return clamp(j.xx, value * j.yy, pow(value, j.zz) * k.xx + k.yy);
}
-/// Compute a linear value from a value in a PQ space.
+/// Compute an SRGB value from a linear value.
///
-/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
-///
-/// @param [in] value The value to convert to linear in PQ space.
+/// @param [in] value The value to convert to SRGB from linear.
///
/// @returns
-/// A value in linear space.
+/// A value in SRGB space.
///
-/// @ingroup GPU
-FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 x)
+/// @ingroup GPUCore
+FfxFloat32x3 ffxSrgbFromLinear(FfxFloat32x3 value)
{
- FfxFloat32x3 p = pow(x, ffxBroadcast3(0.0126833));
- return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739));
+ FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+ FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
+ return clamp(j.xxx, value * j.yyy, pow(value, j.zzz) * k.xxx + k.yyy);
}
/// Compute a linear value from a value in a SRGB space.
@@ -2699,12 +2649,12 @@ FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 x)
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
-FfxFloat32 ffxLinearFromSrgb(FfxFloat32 color)
+/// @ingroup GPUCore
+FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value)
{
- FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+ FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);
FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
- return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z));
+ return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z));
}
/// Compute a linear value from a value in a SRGB space.
@@ -2716,12 +2666,12 @@ FfxFloat32 ffxLinearFromSrgb(FfxFloat32 color)
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
-FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 color)
+/// @ingroup GPUCore
+FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value)
{
- FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+ FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);
FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
- return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz));
+ return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz));
}
/// Compute a linear value from a value in a SRGB space.
@@ -2733,30 +2683,32 @@ FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 color)
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
-FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 color)
+/// @ingroup GPUCore
+FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value)
{
- FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+ FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);
FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
- return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz));
+ return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz));
}
/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear.
-///
-/// 543210
-/// ======
-/// ..xxx.
-/// yy...y
+///
+/// Remap illustration:
+///
+/// 543210
+/// ~~~~~~
+/// ..xxx.
+/// yy...y
///
/// @param [in] a The input 1D coordinates to remap.
///
/// @returns
/// The remapped 2D coordinates.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a)
{
- return FfxUInt32x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u));
+ return FfxUInt32x2(ffxBitfieldExtract(a, 1u, 3u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), a, 1u));
}
/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
@@ -2777,8 +2729,8 @@ FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a)
/// @returns
/// The remapped 2D coordinates.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxRemapForWaveReduction(FfxUInt32 a)
{
- return FfxUInt32x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u));
+ return FfxUInt32x2(ffxBitfieldInsertMask(ffxBitfieldExtract(a, 2u, 3u), a, 1u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), ffxBitfieldExtract(a, 1u, 2u), 2u));
}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h b/thirdparty/amd-ffx/gpu/ffx_core_gpu_common_half.h
similarity index 95%
rename from thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h
rename to thirdparty/amd-ffx/gpu/ffx_core_gpu_common_half.h
index c46ccb36575d..6b071d590f5a 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h
+++ b/thirdparty/amd-ffx/gpu/ffx_core_gpu_common_half.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -20,27 +21,27 @@
// THE SOFTWARE.
#if FFX_HALF
-#if FFX_HLSL_6_2
+#if FFX_HLSL_SM >= 62
/// A define value for 16bit positive infinity.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0x7c00u)
/// A define value for 16bit negative infinity.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0xfc00u)
#else
/// A define value for 16bit positive infinity.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16(0x7c00u)
/// A define value for 16bit negative infinity.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16(0xfc00u)
-#endif // FFX_HLSL_6_2
+#endif // #if FFX_HLSL_SM>=62
/// Compute the min of two values.
///
@@ -50,7 +51,7 @@
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y)
{
return min(x, y);
@@ -64,7 +65,7 @@ FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y)
{
return min(x, y);
@@ -78,7 +79,7 @@ FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y)
{
return min(x, y);
@@ -92,7 +93,7 @@ FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y)
{
return min(x, y);
@@ -106,7 +107,7 @@ FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y)
{
return min(x, y);
@@ -120,7 +121,7 @@ FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y)
{
return min(x, y);
@@ -134,7 +135,7 @@ FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y)
{
return min(x, y);
@@ -148,7 +149,7 @@ FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y)
{
return min(x, y);
@@ -162,7 +163,7 @@ FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y)
{
return min(x, y);
@@ -176,7 +177,7 @@ FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y)
{
return min(x, y);
@@ -190,7 +191,7 @@ FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y)
{
return min(x, y);
@@ -204,7 +205,7 @@ FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y)
{
return min(x, y);
@@ -218,7 +219,7 @@ FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y)
{
return max(x, y);
@@ -232,7 +233,7 @@ FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y)
{
return max(x, y);
@@ -246,7 +247,7 @@ FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y)
{
return max(x, y);
@@ -260,7 +261,7 @@ FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y)
{
return max(x, y);
@@ -274,7 +275,7 @@ FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y)
{
return max(x, y);
@@ -288,7 +289,7 @@ FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y)
{
return max(x, y);
@@ -302,7 +303,7 @@ FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y)
{
return max(x, y);
@@ -316,7 +317,7 @@ FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y)
{
return max(x, y);
@@ -330,7 +331,7 @@ FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y)
{
return max(x, y);
@@ -344,7 +345,7 @@ FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y)
{
return max(x, y);
@@ -358,7 +359,7 @@ FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y)
{
return max(x, y);
@@ -372,7 +373,7 @@ FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y)
/// @returns
/// The the lowest of two values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y)
{
return max(x, y);
@@ -386,7 +387,7 @@ FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y)
/// @returns
/// The value of the first parameter raised to the power of the second.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y)
{
return pow(x, y);
@@ -400,7 +401,7 @@ FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y)
/// @returns
/// The value of the first parameter raised to the power of the second.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y)
{
return pow(x, y);
@@ -414,7 +415,7 @@ FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y)
/// @returns
/// The value of the first parameter raised to the power of the second.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y)
{
return pow(x, y);
@@ -428,7 +429,7 @@ FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y)
/// @returns
/// The value of the first parameter raised to the power of the second.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y)
{
return pow(x, y);
@@ -441,7 +442,7 @@ FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y)
/// @returns
/// The the square root of x.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxSqrt(FfxFloat16 x)
{
return sqrt(x);
@@ -454,7 +455,7 @@ FfxFloat16 ffxSqrt(FfxFloat16 x)
/// @returns
/// The the square root of x.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxSqrt(FfxFloat16x2 x)
{
return sqrt(x);
@@ -467,7 +468,7 @@ FfxFloat16x2 ffxSqrt(FfxFloat16x2 x)
/// @returns
/// The the square root of x.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxSqrt(FfxFloat16x3 x)
{
return sqrt(x);
@@ -480,7 +481,7 @@ FfxFloat16x3 ffxSqrt(FfxFloat16x3 x)
/// @returns
/// The the square root of x.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxSqrt(FfxFloat16x4 x)
{
return sqrt(x);
@@ -494,7 +495,7 @@ FfxFloat16x4 ffxSqrt(FfxFloat16x4 x)
/// @returns
/// The value of d with the sign bit from s.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s)
{
return FFX_TO_FLOAT16(FFX_TO_UINT16(d) | (FFX_TO_UINT16(s) & FFX_BROADCAST_UINT16(0x8000u)));
@@ -508,7 +509,7 @@ FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s)
/// @returns
/// The value of d with the sign bit from s.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s)
{
return FFX_TO_FLOAT16X2(FFX_TO_UINT16X2(d) | (FFX_TO_UINT16X2(s) & FFX_BROADCAST_UINT16X2(0x8000u)));
@@ -522,7 +523,7 @@ FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s)
/// @returns
/// The value of d with the sign bit from s.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s)
{
return FFX_TO_FLOAT16X3(FFX_TO_UINT16X3(d) | (FFX_TO_UINT16X3(s) & FFX_BROADCAST_UINT16X3(0x8000u)));
@@ -536,7 +537,7 @@ FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s)
/// @returns
/// The value of d with the sign bit from s.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s)
{
return FFX_TO_FLOAT16X4(FFX_TO_UINT16X4(d) | (FFX_TO_UINT16X4(s) & FFX_BROADCAST_UINT16X4(0x8000u)));
@@ -559,7 +560,7 @@ FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s)
/// @returns
/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxIsSignedHalf(FfxFloat16 m)
{
return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF));
@@ -582,7 +583,7 @@ FfxFloat16 ffxIsSignedHalf(FfxFloat16 m)
/// @returns
/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m)
{
return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF));
@@ -605,7 +606,7 @@ FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m)
/// @returns
/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m)
{
return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF));
@@ -628,7 +629,7 @@ FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m)
/// @returns
/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m)
{
return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF));
@@ -646,7 +647,7 @@ FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m)
/// @returns
/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m)
{
return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF));
@@ -664,7 +665,7 @@ FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m)
/// @returns
/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m)
{
return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF));
@@ -682,7 +683,7 @@ FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m)
/// @returns
/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m)
{
return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF));
@@ -700,7 +701,7 @@ FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m)
/// @returns
/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m)
{
return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF));
@@ -720,7 +721,7 @@ FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m)
/// @returns
/// The sortable integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x)
{
return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000));
@@ -737,7 +738,7 @@ FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x)
/// @returns
/// The floating point value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x)
{
return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000));
@@ -757,7 +758,7 @@ FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x)
/// @returns
/// The sortable integer values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x)
{
return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000));
@@ -774,7 +775,7 @@ FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x)
/// @returns
/// The floating point values.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x)
{
return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000));
@@ -790,7 +791,7 @@ FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i)
{
return ((i.x) & 0xffu) | ((i.y << 16) & 0xff0000u);
@@ -806,7 +807,7 @@ FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i)
{
return ((i.x >> 8) & 0xffu) | ((i.y << 8) & 0xff0000u);
@@ -822,7 +823,7 @@ FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i)
{
return ((i.x >> 16) & 0xffu) | ((i.y) & 0xff0000u);
@@ -838,7 +839,7 @@ FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i)
{
return ((i.x >> 24) & 0xffu) | ((i.y >> 8) & 0xff0000u);
@@ -854,7 +855,7 @@ FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i)
{
return ((i.x) & 0x000000ffu) | (i.y & 0xffffff00u);
@@ -870,7 +871,7 @@ FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i)
{
return ((i.x >> 16) & 0x000000ffu) | (i.y & 0xffffff00u);
@@ -886,7 +887,7 @@ FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i)
{
return ((i.x << 8) & 0x0000ff00u) | (i.y & 0xffff00ffu);
@@ -902,7 +903,7 @@ FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i)
{
return ((i.x >> 8) & 0x0000ff00u) | (i.y & 0xffff00ffu);
@@ -918,7 +919,7 @@ FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i)
{
return ((i.x << 16) & 0x00ff0000u) | (i.y & 0xff00ffffu);
@@ -934,7 +935,7 @@ FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i)
{
return ((i.x) & 0x00ff0000u) | (i.y & 0xff00ffffu);
@@ -950,7 +951,7 @@ FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i)
{
return ((i.x << 24) & 0xff000000u) | (i.y & 0x00ffffffu);
@@ -966,7 +967,7 @@ FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i)
{
return ((i.x << 8) & 0xff000000u) | (i.y & 0x00ffffffu);
@@ -982,7 +983,7 @@ FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i)
{
return ((i.x) & 0x00ff00ffu) | ((i.y << 8) & 0xff00ff00u);
@@ -998,7 +999,7 @@ FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i)
/// @returns
/// The packed integer value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i)
{
return (((i.x) & 0xffu) | ((i.x >> 8) & 0xff00u) | ((i.y << 16) & 0xff0000u) | ((i.y << 8) & 0xff000000u));
@@ -1012,7 +1013,7 @@ FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i)
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y)
{
x *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0);
@@ -1037,7 +1038,7 @@ FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y)
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
{
FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
@@ -1061,7 +1062,7 @@ FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
{
FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
@@ -1085,7 +1086,7 @@ FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
{
FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
@@ -1109,7 +1110,7 @@ FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
{
FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
@@ -1123,7 +1124,7 @@ FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
/// @returns
/// The unpacked FfxFloat16x2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i)
{
return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
@@ -1136,7 +1137,7 @@ FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i)
/// @returns
/// The unpacked FfxFloat16x2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i)
{
return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
@@ -1149,7 +1150,7 @@ FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i)
/// @returns
/// The unpacked FfxFloat16x2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i)
{
return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
@@ -1162,7 +1163,7 @@ FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i)
/// @returns
/// The unpacked FfxFloat16x2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i)
{
return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
@@ -1176,7 +1177,7 @@ FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i)
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y)
{
x = x * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0);
@@ -1195,7 +1196,7 @@ FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y)
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
{
FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
@@ -1213,7 +1214,7 @@ FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
{
FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
@@ -1231,7 +1232,7 @@ FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
{
FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
@@ -1249,7 +1250,7 @@ FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
{
FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
@@ -1270,7 +1271,7 @@ FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
{
FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
@@ -1291,7 +1292,7 @@ FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
{
FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
@@ -1312,7 +1313,7 @@ FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
{
FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
@@ -1333,7 +1334,7 @@ FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x
/// @returns
/// The packed FfxUInt32x2 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
{
FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
@@ -1349,7 +1350,7 @@ FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x
/// @returns
/// The unpacked FfxFloat16x2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i)
{
return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
@@ -1364,7 +1365,7 @@ FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i)
/// @returns
/// The unpacked FfxFloat16x2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i)
{
return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
@@ -1379,7 +1380,7 @@ FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i)
/// @returns
/// The unpacked FfxFloat16x2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i)
{
return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
@@ -1394,7 +1395,7 @@ FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i)
/// @returns
/// The unpacked FfxFloat16x2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i)
{
return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
@@ -1409,7 +1410,7 @@ FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i)
/// @returns
/// The unpacked FfxFloat16x2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i)
{
return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
@@ -1424,7 +1425,7 @@ FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i)
/// @returns
/// The unpacked FfxFloat16x2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i)
{
return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
@@ -1439,7 +1440,7 @@ FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i)
/// @returns
/// The unpacked FfxFloat16x2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i)
{
return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
@@ -1454,7 +1455,7 @@ FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i)
/// @returns
/// The unpacked FfxFloat16x2.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i)
{
return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
@@ -1473,7 +1474,7 @@ FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i)
/// @returns
/// An approximation of the square root, estimated to low quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a)
{
return FFX_TO_FLOAT16((FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)) + FFX_BROADCAST_UINT16(0x1de2));
@@ -1492,7 +1493,7 @@ FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a)
/// @returns
/// An approximation of the square root, estimated to low quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a)
{
return FFX_TO_FLOAT16X2((FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)) + FFX_BROADCAST_UINT16X2(0x1de2));
@@ -1511,7 +1512,7 @@ FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a)
/// @returns
/// An approximation of the square root, estimated to low quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a)
{
return FFX_TO_FLOAT16X3((FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)) + FFX_BROADCAST_UINT16X3(0x1de2));
@@ -1530,7 +1531,7 @@ FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a)
/// @returns
/// An approximation of the reciprocal, estimated to low quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a)
{
return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x7784) - FFX_TO_UINT16(a));
@@ -1549,7 +1550,7 @@ FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a)
/// @returns
/// An approximation of the reciprocal, estimated to low quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a)
{
return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x7784) - FFX_TO_UINT16X2(a));
@@ -1568,7 +1569,7 @@ FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a)
/// @returns
/// An approximation of the reciprocal, estimated to low quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a)
{
return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x7784) - FFX_TO_UINT16X3(a));
@@ -1587,7 +1588,7 @@ FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a)
/// @returns
/// An approximation of the reciprocal, estimated to low quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a)
{
return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x7784) - FFX_TO_UINT16X4(a));
@@ -1606,7 +1607,7 @@ FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a)
/// @returns
/// An approximation of the reciprocal, estimated to medium quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a)
{
FfxFloat16 b = FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x778d) - FFX_TO_UINT16(a));
@@ -1626,7 +1627,7 @@ FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a)
/// @returns
/// An approximation of the reciprocal, estimated to medium quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a)
{
FfxFloat16x2 b = FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x778d) - FFX_TO_UINT16X2(a));
@@ -1646,7 +1647,7 @@ FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a)
/// @returns
/// An approximation of the reciprocal, estimated to medium quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a)
{
FfxFloat16x3 b = FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x778d) - FFX_TO_UINT16X3(a));
@@ -1666,7 +1667,7 @@ FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a)
/// @returns
/// An approximation of the reciprocal, estimated to medium quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a)
{
FfxFloat16x4 b = FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x778d) - FFX_TO_UINT16X4(a));
@@ -1686,7 +1687,7 @@ FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a)
/// @returns
/// An approximation of the reciprocal of the square root, estimated to low quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a)
{
return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x59a3) - (FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)));
@@ -1705,7 +1706,7 @@ FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a)
/// @returns
/// An approximation of the reciprocal of the square root, estimated to low quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a)
{
return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x59a3) - (FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)));
@@ -1724,7 +1725,7 @@ FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a)
/// @returns
/// An approximation of the reciprocal of the square root, estimated to low quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a)
{
return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x59a3) - (FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)));
@@ -1743,7 +1744,7 @@ FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a)
/// @returns
/// An approximation of the reciprocal of the square root, estimated to low quality.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x4 a)
{
return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x59a3) - (FFX_TO_UINT16X4(a) >> FFX_BROADCAST_UINT16X4(1)));
@@ -1833,7 +1834,7 @@ FfxFloat16x2 ffxParabolicSinCosHalf(FfxFloat16 x)
/// @returns
/// Result of the AND operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y)
{
return min(x, y);
@@ -1847,7 +1848,7 @@ FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y)
/// @returns
/// Result of the AND operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y)
{
return min(x, y);
@@ -1861,7 +1862,7 @@ FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y)
/// @returns
/// Result of the AND operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y)
{
return min(x, y);
@@ -1875,7 +1876,7 @@ FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y)
/// @returns
/// Result of the AND operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y)
{
return min(x, y);
@@ -1889,7 +1890,7 @@ FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y)
/// @returns
/// Result of the NOT operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x)
{
return x ^ FFX_BROADCAST_UINT16(1);
@@ -1903,7 +1904,7 @@ FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x)
/// @returns
/// Result of the NOT operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x)
{
return x ^ FFX_BROADCAST_UINT16X2(1);
@@ -1917,7 +1918,7 @@ FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x)
/// @returns
/// Result of the NOT operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x)
{
return x ^ FFX_BROADCAST_UINT16X3(1);
@@ -1931,7 +1932,7 @@ FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x)
/// @returns
/// Result of the NOT operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x)
{
return x ^ FFX_BROADCAST_UINT16X4(1);
@@ -1945,7 +1946,7 @@ FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y)
{
return max(x, y);
@@ -1959,7 +1960,7 @@ FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y)
{
return max(x, y);
@@ -1973,7 +1974,7 @@ FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y)
{
return max(x, y);
@@ -1987,7 +1988,7 @@ FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y)
{
return max(x, y);
@@ -2000,7 +2001,7 @@ FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y)
/// @returns
/// The converted Uint value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x)
{
return FFX_TO_UINT16(x * FFX_TO_FLOAT16(FFX_TO_UINT16(1)));
@@ -2013,7 +2014,7 @@ FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x)
/// @returns
/// The converted Uint value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x)
{
return FFX_TO_UINT16X2(x * FFX_TO_FLOAT16X2(FfxUInt16x2(1, 1)));
@@ -2026,7 +2027,7 @@ FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x)
/// @returns
/// The converted Uint value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x)
{
return FFX_TO_UINT16X3(x * FFX_TO_FLOAT16X3(FfxUInt16x3(1, 1, 1)));
@@ -2039,7 +2040,7 @@ FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x)
/// @returns
/// The converted Uint value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x)
{
return FFX_TO_UINT16X4(x * FFX_TO_FLOAT16X4(FfxUInt16x4(1, 1, 1, 1)));
@@ -2052,7 +2053,7 @@ FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x)
/// @returns
/// The converted half-precision FfxFloat32 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x)
{
return FFX_TO_FLOAT16(x * FFX_TO_UINT16(FFX_TO_FLOAT16(1.0)));
@@ -2065,7 +2066,7 @@ FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x)
/// @returns
/// The converted half-precision FfxFloat32 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x)
{
return FFX_TO_FLOAT16X2(x * FFX_TO_UINT16X2(FfxUInt16x2(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0))));
@@ -2078,7 +2079,7 @@ FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x)
/// @returns
/// The converted half-precision FfxFloat32 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x)
{
return FFX_TO_FLOAT16X3(x * FFX_TO_UINT16X3(FfxUInt16x3(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0))));
@@ -2091,7 +2092,7 @@ FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x)
/// @returns
/// The converted half-precision FfxFloat32 value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x)
{
return FFX_TO_FLOAT16X4(x * FFX_TO_UINT16X4(FfxUInt16x4(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0))));
@@ -2105,7 +2106,7 @@ FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x)
/// @returns
/// Result of the AND operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y)
{
return min(x, y);
@@ -2119,7 +2120,7 @@ FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y)
/// @returns
/// Result of the AND operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y)
{
return min(x, y);
@@ -2133,7 +2134,7 @@ FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y)
/// @returns
/// Result of the AND operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y)
{
return min(x, y);
@@ -2147,7 +2148,7 @@ FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y)
/// @returns
/// Result of the AND operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y)
{
return min(x, y);
@@ -2161,7 +2162,7 @@ FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y)
/// @returns
/// Result of the AND NOT operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y)
{
return (-x) * y + FFX_BROADCAST_FLOAT16(1.0);
@@ -2175,7 +2176,7 @@ FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y)
/// @returns
/// Result of the AND NOT operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y)
{
return (-x) * y + FFX_BROADCAST_FLOAT16X2(1.0);
@@ -2189,7 +2190,7 @@ FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y)
/// @returns
/// Result of the AND NOT operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y)
{
return (-x) * y + FFX_BROADCAST_FLOAT16X3(1.0);
@@ -2203,7 +2204,7 @@ FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y)
/// @returns
/// Result of the AND NOT operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y)
{
return (-x) * y + FFX_BROADCAST_FLOAT16X4(1.0);
@@ -2219,7 +2220,7 @@ FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
{
return ffxSaturate(x * y + z);
@@ -2235,7 +2236,7 @@ FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
{
return ffxSaturate(x * y + z);
@@ -2251,7 +2252,7 @@ FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
{
return ffxSaturate(x * y + z);
@@ -2267,7 +2268,7 @@ FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
{
return ffxSaturate(x * y + z);
@@ -2280,7 +2281,7 @@ FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
/// @returns
/// Result of the greater than zero comparison.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x)
{
return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF));
@@ -2293,7 +2294,7 @@ FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x)
/// @returns
/// Result of the greater than zero comparison.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x)
{
return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF));
@@ -2306,7 +2307,7 @@ FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x)
/// @returns
/// Result of the greater than zero comparison.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x)
{
return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF));
@@ -2319,7 +2320,7 @@ FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x)
/// @returns
/// Result of the greater than zero comparison.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x)
{
return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF));
@@ -2332,7 +2333,7 @@ FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x)
{
return FFX_BROADCAST_FLOAT16(1.0) - x;
@@ -2345,7 +2346,7 @@ FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x)
{
return FFX_BROADCAST_FLOAT16X2(1.0) - x;
@@ -2358,7 +2359,7 @@ FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x)
{
return FFX_BROADCAST_FLOAT16X3(1.0) - x;
@@ -2371,7 +2372,7 @@ FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x)
/// @returns
/// Result of the AND OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x)
{
return FFX_BROADCAST_FLOAT16X4(1.0) - x;
@@ -2385,7 +2386,7 @@ FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y)
{
return max(x, y);
@@ -2399,7 +2400,7 @@ FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y)
{
return max(x, y);
@@ -2413,7 +2414,7 @@ FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y)
{
return max(x, y);
@@ -2427,7 +2428,7 @@ FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y)
/// @returns
/// Result of the OR operation.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y)
{
return max(x, y);
@@ -2442,7 +2443,7 @@ FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y)
/// @returns
/// The selected value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
{
FfxFloat16 r = (-x) * z + z;
@@ -2458,7 +2459,7 @@ FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
/// @returns
/// The selected value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
{
FfxFloat16x2 r = (-x) * z + z;
@@ -2474,7 +2475,7 @@ FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z
/// @returns
/// The selected value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
{
FfxFloat16x3 r = (-x) * z + z;
@@ -2490,7 +2491,7 @@ FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z
/// @returns
/// The selected value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
{
FfxFloat16x4 r = (-x) * z + z;
@@ -2504,7 +2505,7 @@ FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z
/// @returns
/// Result of the sign value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x)
{
return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF));
@@ -2517,7 +2518,7 @@ FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x)
/// @returns
/// Result of the sign value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x)
{
return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF));
@@ -2530,7 +2531,7 @@ FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x)
/// @returns
/// Result of the sign value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x)
{
return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF));
@@ -2543,7 +2544,7 @@ FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x)
/// @returns
/// Result of the sign value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x)
{
return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF));
@@ -2562,7 +2563,7 @@ FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x)
/// @returns
/// The color in Rec.709 space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c)
{
FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
@@ -2583,7 +2584,7 @@ FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c)
/// @returns
/// The color in Rec.709 space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c)
{
FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
@@ -2604,7 +2605,7 @@ FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c)
/// @returns
/// The color in Rec.709 space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c)
{
FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
@@ -2624,7 +2625,7 @@ FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c)
/// @returns
/// A value in gamma space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX)
{
return pow(c, FFX_BROADCAST_FLOAT16(rcpX));
@@ -2642,7 +2643,7 @@ FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX)
/// @returns
/// A value in gamma space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX)
{
return pow(c, FFX_BROADCAST_FLOAT16X2(rcpX));
@@ -2660,7 +2661,7 @@ FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX)
/// @returns
/// A value in gamma space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX)
{
return pow(c, FFX_BROADCAST_FLOAT16X3(rcpX));
@@ -2673,7 +2674,7 @@ FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX)
/// @returns
/// A value in SRGB space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c)
{
FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
@@ -2688,7 +2689,7 @@ FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c)
/// @returns
/// A value in SRGB space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c)
{
FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
@@ -2703,7 +2704,7 @@ FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c)
/// @returns
/// A value in SRGB space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c)
{
FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
@@ -2718,7 +2719,7 @@ FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c)
/// @returns
/// A square root of the input value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxSquareRootHalf(FfxFloat16 c)
{
return sqrt(c);
@@ -2731,7 +2732,7 @@ FfxFloat16 ffxSquareRootHalf(FfxFloat16 c)
/// @returns
/// A square root of the input value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c)
{
return sqrt(c);
@@ -2744,7 +2745,7 @@ FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c)
/// @returns
/// A square root of the input value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c)
{
return sqrt(c);
@@ -2757,7 +2758,7 @@ FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c)
/// @returns
/// A cube root of the input value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxCubeRootHalf(FfxFloat16 c)
{
return pow(c, FFX_BROADCAST_FLOAT16(1.0 / 3.0));
@@ -2770,7 +2771,7 @@ FfxFloat16 ffxCubeRootHalf(FfxFloat16 c)
/// @returns
/// A cube root of the input value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c)
{
return pow(c, FFX_BROADCAST_FLOAT16X2(1.0 / 3.0));
@@ -2783,7 +2784,7 @@ FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c)
/// @returns
/// A cube root of the input value.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c)
{
return pow(c, FFX_BROADCAST_FLOAT16X3(1.0 / 3.0));
@@ -2796,7 +2797,7 @@ FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c)
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c)
{
FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
@@ -2811,7 +2812,7 @@ FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c)
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c)
{
FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
@@ -2826,7 +2827,7 @@ FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c)
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c)
{
FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
@@ -2844,7 +2845,7 @@ FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c)
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x)
{
return pow(c, FFX_BROADCAST_FLOAT16(x));
@@ -2860,7 +2861,7 @@ FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x)
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x)
{
return pow(c, FFX_BROADCAST_FLOAT16X2(x));
@@ -2876,7 +2877,7 @@ FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x)
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x)
{
return pow(c, FFX_BROADCAST_FLOAT16X3(x));
@@ -2891,10 +2892,10 @@ FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x)
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c)
{
- FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+ FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4);
FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z));
}
@@ -2908,10 +2909,10 @@ FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c)
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c)
{
- FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+ FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4);
FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz));
}
@@ -2925,30 +2926,32 @@ FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c)
/// @returns
/// A value in linear space.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c)
{
- FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+ FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4);
FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz));
}
/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear.
///
-/// 543210
-/// ======
-/// ..xxx.
-/// yy...y
+/// Remap illustration:
+///
+/// 543210
+/// ~~~~~~
+/// ..xxx.
+/// yy...y
///
/// @param [in] a The input 1D coordinates to remap.
///
/// @returns
/// The remapped 2D coordinates.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a)
{
- return FfxUInt16x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u));
+ return FfxUInt16x2(ffxBitfieldExtract(a, 1u, 3u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), a, 1u));
}
/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
@@ -2969,10 +2972,10 @@ FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a)
/// @returns
/// The remapped 2D coordinates.
///
-/// @ingroup GPU
+/// @ingroup GPUCore
FfxUInt16x2 ffxRemapForWaveReductionHalf(FfxUInt32 a)
{
- return FfxUInt16x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u));
+ return FfxUInt16x2(ffxBitfieldInsertMask(ffxBitfieldExtract(a, 2u, 3u), a, 1u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), ffxBitfieldExtract(a, 1u, 2u), 2u));
}
#endif // FFX_HALF
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h b/thirdparty/amd-ffx/gpu/ffx_core_hlsl.h
similarity index 75%
rename from thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
rename to thirdparty/amd-ffx/gpu/ffx_core_hlsl.h
index ad4ff6552d16..28827d98652f 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
+++ b/thirdparty/amd-ffx/gpu/ffx_core_hlsl.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -19,191 +20,274 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
+/// @defgroup HLSLCore HLSL Core
+/// HLSL core defines and functions
+///
+/// @ingroup FfxHLSL
+
+#define DECLARE_SRV_REGISTER(regIndex) t##regIndex
+#define DECLARE_UAV_REGISTER(regIndex) u##regIndex
+#define DECLARE_CB_REGISTER(regIndex) b##regIndex
+#define FFX_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex))
+#define FFX_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex))
+#define FFX_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex))
+
+/// A define for abstracting select functionality for pre/post HLSL 21
+///
+/// @ingroup HLSLCore
+#if __HLSL_VERSION >= 2021
+
+#define FFX_SELECT(cond, arg1, arg2) select(cond, arg1, arg2)
+
+#else // #if __HLSL_VERSION >= 2021
+
+#define FFX_SELECT(cond, arg1, arg2) cond ? arg1 : arg2
+
+#endif // #if __HLSL_VERSION >= 2021
+
/// A define for abstracting shared memory between shading languages.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_GROUPSHARED groupshared
/// A define for abstracting compute memory barriers between shading languages.
///
-/// @ingroup GPU
-#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync
+/// @ingroup HLSLCore
+#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync()
+
+/// A define for abstracting compute atomic additions between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_ATOMIC_ADD(x, y) InterlockedAdd(x, y)
+
+/// A define for abstracting compute atomic additions between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_ATOMIC_ADD_RETURN(x, y, r) InterlockedAdd(x, y, r)
+
+/// A define for abstracting compute atomic OR between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_ATOMIC_OR(x, y) InterlockedOr(x, y)
+
+/// A define for abstracting compute atomic min between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_ATOMIC_MIN(x, y) InterlockedMin(x, y)
+
+/// A define for abstracting compute atomic max between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_ATOMIC_MAX(x, y) InterlockedMax(x, y)
/// A define added to accept static markup on functions to aid CPU/GPU portability of code.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_STATIC static
/// A define for abstracting loop unrolling between shading languages.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_UNROLL [unroll]
/// A define for abstracting a 'greater than' comparison operator between two types.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_GREATER_THAN(x, y) x > y
/// A define for abstracting a 'greater than or equal' comparison operator between two types.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_GREATER_THAN_EQUAL(x, y) x >= y
/// A define for abstracting a 'less than' comparison operator between two types.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_LESS_THAN(x, y) x < y
/// A define for abstracting a 'less than or equal' comparison operator between two types.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_LESS_THAN_EQUAL(x, y) x <= y
/// A define for abstracting an 'equal' comparison operator between two types.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_EQUAL(x, y) x == y
/// A define for abstracting a 'not equal' comparison operator between two types.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_NOT_EQUAL(x, y) x != y
+/// A define for abstracting matrix multiply operations between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_MATRIX_MULTIPLY(a, b) mul(a, b)
+
+/// A define for abstracting vector transformations between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_TRANSFORM_VECTOR(a, b) mul(a, b)
+
+/// A define for abstracting modulo operations between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_MODULO(a, b) (fmod(a, b))
+
/// Broadcast a scalar value to a 1-dimensional floating point vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x)
/// Broadcast a scalar value to a 2-dimensional floating point vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x)
/// Broadcast a scalar value to a 3-dimensional floating point vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x)
/// Broadcast a scalar value to a 4-dimensional floating point vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x)
/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_UINT32(x) FfxUInt32(x)
/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x)
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x)
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x)
/// Broadcast a scalar value to a 1-dimensional signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_INT32(x) FfxInt32(x)
/// Broadcast a scalar value to a 2-dimensional signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_INT32X2(x) FfxInt32(x)
/// Broadcast a scalar value to a 3-dimensional signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_INT32X3(x) FfxInt32(x)
/// Broadcast a scalar value to a 4-dimensional signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_INT32X4(x) FfxInt32(x)
/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_MIN_FLOAT16(a) FFX_MIN16_F(a)
/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a)
/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a)
/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a)
/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_MIN_UINT16(a) FFX_MIN16_U(a)
/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a)
/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a)
/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a)
/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_MIN_INT16(a) FFX_MIN16_I(a)
/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a)
/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a)
/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.
///
-/// @ingroup GPU
+/// @ingroup HLSLCore
#define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a)
-/// Pack 2x32-bit floating point values in a single 32bit value.
+/// Convert FfxFloat32 to half (in lower 16-bits of output).
+///
+/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
+///
+/// The function supports denormals.
+///
+/// Some conversion rules are to make computations possibly "safer" on the GPU,
+/// -INF & -NaN -> -65504
+/// +INF & +NaN -> +65504
+///
+/// @param [in] f The 32bit floating point value to convert.
+///
+/// @returns
+/// The closest 16bit floating point value to f.
///
+/// @ingroup HLSLCore
+#define ffxF32ToF16 f32tof16
+
+/// Pack 2x32-bit floating point values in a single 32bit value.
+///
/// This function first converts each component of value into their nearest 16-bit floating
/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
/// 32bit unsigned integer respectively.
///
/// @param [in] value A 2-dimensional floating point value to convert and pack.
-///
+///
/// @returns
/// A packed 32bit value containing 2 16bit floating point values.
-///
-/// @ingroup HLSL
-FfxUInt32 packHalf2x16(FfxFloat32x2 value)
+///
+/// @ingroup HLSLCore
+FfxUInt32 ffxPackHalf2x16(FfxFloat32x2 value)
{
- return f32tof16(value.x) | (f32tof16(value.y) << 16);
+ return ffxF32ToF16(value.x) | (ffxF32ToF16(value.y) << 16);
}
/// Broadcast a scalar value to a 2-dimensional floating point vector.
@@ -213,7 +297,7 @@ FfxUInt32 packHalf2x16(FfxFloat32x2 value)
/// @returns
/// A 2-dimensional floating point vector with value in each component.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
{
return FfxFloat32x2(value, value);
@@ -226,7 +310,7 @@ FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
/// @returns
/// A 3-dimensional floating point vector with value in each component.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
{
return FfxFloat32x3(value, value, value);
@@ -239,7 +323,7 @@ FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
/// @returns
/// A 4-dimensional floating point vector with value in each component.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
{
return FfxFloat32x4(value, value, value, value);
@@ -252,7 +336,7 @@ FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
/// @returns
/// A 2-dimensional signed integer vector with value in each component.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxInt32x2 ffxBroadcast2(FfxInt32 value)
{
return FfxInt32x2(value, value);
@@ -265,10 +349,10 @@ FfxInt32x2 ffxBroadcast2(FfxInt32 value)
/// @returns
/// A 3-dimensional signed integer vector with value in each component.
///
-/// @ingroup HLSL
-FfxUInt32x3 ffxBroadcast3(FfxInt32 value)
+/// @ingroup HLSLCore
+FfxInt32x3 ffxBroadcast3(FfxInt32 value)
{
- return FfxUInt32x3(value, value, value);
+ return FfxInt32x3(value, value, value);
}
/// Broadcast a scalar value to a 4-dimensional signed integer vector.
@@ -278,7 +362,7 @@ FfxUInt32x3 ffxBroadcast3(FfxInt32 value)
/// @returns
/// A 4-dimensional signed integer vector with value in each component.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxInt32x4 ffxBroadcast4(FfxInt32 value)
{
return FfxInt32x4(value, value, value, value);
@@ -291,7 +375,7 @@ FfxInt32x4 ffxBroadcast4(FfxInt32 value)
/// @returns
/// A 2-dimensional unsigned integer vector with value in each component.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
{
return FfxUInt32x2(value, value);
@@ -304,7 +388,7 @@ FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
/// @returns
/// A 3-dimensional unsigned integer vector with value in each component.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
{
return FfxUInt32x3(value, value, value);
@@ -317,24 +401,24 @@ FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
/// @returns
/// A 4-dimensional unsigned integer vector with value in each component.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)
{
return FfxUInt32x4(value, value, value, value);
}
-FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
+FfxUInt32 ffxBitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
{
FfxUInt32 mask = (1u << bits) - 1;
return (src >> off) & mask;
}
-FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
+FfxUInt32 ffxBitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
{
return (ins & mask) | (src & (~mask));
}
-FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)
+FfxUInt32 ffxBitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)
{
FfxUInt32 mask = (1u << bits) - 1;
return (ins & mask) | (src & (~mask));
@@ -342,12 +426,12 @@ FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)
/// Interprets the bit pattern of x as an unsigned integer.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as an unsigned integer.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32 ffxAsUInt32(FfxFloat32 x)
{
return asuint(x);
@@ -355,12 +439,12 @@ FfxUInt32 ffxAsUInt32(FfxFloat32 x)
/// Interprets the bit pattern of x as an unsigned integer.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as an unsigned integer.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
{
return asuint(x);
@@ -368,12 +452,12 @@ FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
/// Interprets the bit pattern of x as an unsigned integer.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as an unsigned integer.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
{
return asuint(x);
@@ -381,12 +465,12 @@ FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
/// Interprets the bit pattern of x as an unsigned integer.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as an unsigned integer.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)
{
return asuint(x);
@@ -394,12 +478,12 @@ FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)
/// Interprets the bit pattern of x as a floating-point number.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as a floating-point number.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32 ffxAsFloat(FfxUInt32 x)
{
return asfloat(x);
@@ -407,12 +491,12 @@ FfxFloat32 ffxAsFloat(FfxUInt32 x)
/// Interprets the bit pattern of x as a floating-point number.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as a floating-point number.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
{
return asfloat(x);
@@ -420,12 +504,12 @@ FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
/// Interprets the bit pattern of x as a floating-point number.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as a floating-point number.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
{
return asfloat(x);
@@ -433,17 +517,121 @@ FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
/// Interprets the bit pattern of x as a floating-point number.
///
-/// @param [in] value The input value.
+/// @param [in] x The input value.
///
/// @returns
/// The input interpreted as a floating-point number.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
{
return asfloat(x);
}
+/// Compute the inverse of a value.
+///
+/// @param [in] x The value to calulate the inverse of.
+///
+/// @returns
+/// The inverse of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxReciprocal(FfxFloat32 x)
+{
+ return rcp(x);
+}
+
+/// Compute the inverse of a value.
+///
+/// @param [in] x The value to calulate the inverse of.
+///
+/// @returns
+/// The inverse of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxReciprocal(FfxFloat32x2 x)
+{
+ return rcp(x);
+}
+
+/// Compute the inverse of a value.
+///
+/// @param [in] x The value to calulate the inverse of.
+///
+/// @returns
+/// The inverse of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxReciprocal(FfxFloat32x3 x)
+{
+ return rcp(x);
+}
+
+/// Compute the inverse of a value.
+///
+/// @param [in] x The value to calulate the inverse of.
+///
+/// @returns
+/// The inverse of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxReciprocal(FfxFloat32x4 x)
+{
+ return rcp(x);
+}
+
+/// Compute the inverse square root of a value.
+///
+/// @param [in] x The value to calulate the inverse square root of.
+///
+/// @returns
+/// The inverse square root of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxRsqrt(FfxFloat32 x)
+{
+ return rsqrt(x);
+}
+
+/// Compute the inverse square root of a value.
+///
+/// @param [in] x The value to calulate the inverse square root of.
+///
+/// @returns
+/// The inverse square root of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxRsqrt(FfxFloat32x2 x)
+{
+ return rsqrt(x);
+}
+
+/// Compute the inverse square root of a value.
+///
+/// @param [in] x The value to calulate the inverse square root of.
+///
+/// @returns
+/// The inverse square root of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxRsqrt(FfxFloat32x3 x)
+{
+ return rsqrt(x);
+}
+
+/// Compute the inverse square root of a value.
+///
+/// @param [in] x The value to calulate the inverse square root of.
+///
+/// @returns
+/// The inverse square root of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxRsqrt(FfxFloat32x4 x)
+{
+ return rsqrt(x);
+}
+
/// Compute the linear interopation between two values.
///
/// Implemented by calling the HLSL mix instrinsic function. Implements the
@@ -458,7 +646,7 @@ FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
{
return lerp(x, y, t);
@@ -478,7 +666,7 @@ FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
{
return lerp(x, y, t);
@@ -498,7 +686,7 @@ FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
{
return lerp(x, y, t);
@@ -518,7 +706,7 @@ FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
{
return lerp(x, y, t);
@@ -538,7 +726,7 @@ FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
{
return lerp(x, y, t);
@@ -558,7 +746,7 @@ FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
{
return lerp(x, y, t);
@@ -578,7 +766,7 @@ FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
/// @returns
/// A linearly interpolated value between x and y according to t.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
{
return lerp(x, y, t);
@@ -591,7 +779,7 @@ FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
/// @returns
/// The clamped version of x.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32 ffxSaturate(FfxFloat32 x)
{
return saturate(x);
@@ -604,7 +792,7 @@ FfxFloat32 ffxSaturate(FfxFloat32 x)
/// @returns
/// The clamped version of x.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
{
return saturate(x);
@@ -617,7 +805,7 @@ FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
/// @returns
/// The clamped version of x.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
{
return saturate(x);
@@ -630,7 +818,7 @@ FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
/// @returns
/// The clamped version of x.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
{
return saturate(x);
@@ -645,11 +833,11 @@ FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
/// function.
///
/// @param [in] x The value to compute the fractional part from.
-///
+///
/// @returns
/// The fractional part of x.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32 ffxFract(FfxFloat32 x)
{
return x - floor(x);
@@ -668,7 +856,7 @@ FfxFloat32 ffxFract(FfxFloat32 x)
/// @returns
/// The fractional part of x.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x2 ffxFract(FfxFloat32x2 x)
{
return x - floor(x);
@@ -687,7 +875,7 @@ FfxFloat32x2 ffxFract(FfxFloat32x2 x)
/// @returns
/// The fractional part of x.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x3 ffxFract(FfxFloat32x3 x)
{
return x - floor(x);
@@ -698,7 +886,7 @@ FfxFloat32x3 ffxFract(FfxFloat32x3 x)
/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function.
///
/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is
-/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic
+/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic
/// function.
///
/// @param [in] x The value to compute the fractional part from.
@@ -706,24 +894,76 @@ FfxFloat32x3 ffxFract(FfxFloat32x3 x)
/// @returns
/// The fractional part of x.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x4 ffxFract(FfxFloat32x4 x)
{
return x - floor(x);
}
+/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer.
+///
+/// @param [in] x The value to be rounded.
+///
+/// @returns
+/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxRound(FfxFloat32 x)
+{
+ return round(x);
+}
+
+/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer.
+///
+/// @param [in] x The value to be rounded.
+///
+/// @returns
+/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxRound(FfxFloat32x2 x)
+{
+ return round(x);
+}
+
+/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer.
+///
+/// @param [in] x The value to be rounded.
+///
+/// @returns
+/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxRound(FfxFloat32x3 x)
+{
+ return round(x);
+}
+
+/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer.
+///
+/// @param [in] x The value to be rounded.
+///
+/// @returns
+/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxRound(FfxFloat32x4 x)
+{
+ return round(x);
+}
+
/// Compute the maximum of three values.
///
/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware.
-///
+///
/// @param [in] x The first value to include in the max calculation.
/// @param [in] y The second value to include in the max calcuation.
/// @param [in] z The third value to include in the max calcuation.
-///
+///
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
{
return max(x, max(y, z));
@@ -732,7 +972,7 @@ FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
/// Compute the maximum of three values.
///
/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware.
-///
+///
/// @param [in] x The first value to include in the max calculation.
/// @param [in] y The second value to include in the max calcuation.
/// @param [in] z The third value to include in the max calcuation.
@@ -740,7 +980,7 @@ FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
{
return max(x, max(y, z));
@@ -749,7 +989,7 @@ FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
/// Compute the maximum of three values.
///
/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware.
-///
+///
/// @param [in] x The first value to include in the max calculation.
/// @param [in] y The second value to include in the max calcuation.
/// @param [in] z The third value to include in the max calcuation.
@@ -757,7 +997,7 @@ FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
{
return max(x, max(y, z));
@@ -774,7 +1014,7 @@ FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
{
return max(x, max(y, z));
@@ -791,7 +1031,7 @@ FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
{
return max(x, max(y, z));
@@ -808,7 +1048,7 @@ FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
{
return max(x, max(y, z));
@@ -825,7 +1065,7 @@ FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
{
return max(x, max(y, z));
@@ -842,7 +1082,7 @@ FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
/// @returns
/// The maximum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
{
return max(x, max(y, z));
@@ -859,7 +1099,7 @@ FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
/// @returns
/// The median value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
{
return max(min(x, y), min(max(x, y), z));
@@ -876,7 +1116,7 @@ FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
/// @returns
/// The median value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
{
return max(min(x, y), min(max(x, y), z));
@@ -893,7 +1133,7 @@ FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
/// @returns
/// The median value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
{
return max(min(x, y), min(max(x, y), z));
@@ -910,7 +1150,7 @@ FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
/// @returns
/// The median value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
{
return max(min(x, y), min(max(x, y), z));
@@ -990,7 +1230,7 @@ FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
/// Compute the minimum of three values.
///
-/// NOTE: This function should compile down to a single V_MIN3_I32 operation on GCN/RDNA hardware.
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
@@ -999,7 +1239,7 @@ FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
{
return min(x, min(y, z));
@@ -1007,7 +1247,7 @@ FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
/// Compute the minimum of three values.
///
-/// NOTE: This function should compile down to a single V_MIN3_I32 operation on GCN/RDNA hardware.
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
@@ -1016,7 +1256,7 @@ FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
{
return min(x, min(y, z));
@@ -1024,7 +1264,7 @@ FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
/// Compute the minimum of three values.
///
-/// NOTE: This function should compile down to a single V_MIN3_I32 operation on GCN/RDNA hardware.
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
@@ -1033,7 +1273,7 @@ FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
{
return min(x, min(y, z));
@@ -1041,7 +1281,7 @@ FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
/// Compute the minimum of three values.
///
-/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
@@ -1050,7 +1290,7 @@ FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
{
return min(x, min(y, z));
@@ -1058,7 +1298,7 @@ FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
/// Compute the minimum of three values.
///
-/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
@@ -1067,7 +1307,7 @@ FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
{
return min(x, min(y, z));
@@ -1075,7 +1315,7 @@ FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
/// Compute the minimum of three values.
///
-/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
@@ -1084,7 +1324,7 @@ FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
{
return min(x, min(y, z));
@@ -1092,16 +1332,16 @@ FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
/// Compute the minimum of three values.
///
-/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
-/// @param [in] y The second value to include in the min calcuation.
-/// @param [in] z The third value to include in the min calcuation.
+/// @param [in] y The second value to include in the min calculation.
+/// @param [in] z The third value to include in the min calculation.
///
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
{
return min(x, min(y, z));
@@ -1109,7 +1349,7 @@ FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
/// Compute the minimum of three values.
///
-/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
@@ -1118,23 +1358,38 @@ FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
/// @returns
/// The minimum value of x, y, and z.
///
-/// @ingroup HLSL
+/// @ingroup HLSLCore
FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
{
return min(x, min(y, z));
}
-FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
+FfxUInt32 ffxAShrSU1(FfxUInt32 a, FfxUInt32 b)
{
return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
}
+FfxUInt32 ffxPackF32(FfxFloat32x2 v){
+ FfxUInt32x2 p = FfxUInt32x2(ffxF32ToF16(FfxFloat32x2(v).x), ffxF32ToF16(FfxFloat32x2(v).y));
+ return p.x | (p.y << 16);
+}
+
+FfxFloat32x2 ffxUnpackF32(FfxUInt32 a){
+ return f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16));
+}
+
+FfxUInt32x2 ffxPackF32x2(FfxFloat32x4 v){
+ return FfxUInt32x2(ffxPackF32(v.xy), ffxPackF32(v.zw));
+}
+
+FfxFloat32x4 ffxUnpackF32x2(FfxUInt32x2 a){
+ return FfxFloat32x4(ffxUnpackF32(a.x), ffxUnpackF32(a.y));
+}
+
//==============================================================================================================================
// HLSL HALF
//==============================================================================================================================
-#if FFX_HALF
-
//==============================================================================================================================
// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly).
// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/
@@ -1156,14 +1411,71 @@ FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x)
{
return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y));
}
+
+FfxUInt32x2 ffxFloat16x4ToUint32x2(FFX_MIN16_F4 v)
+{
+ FfxUInt32x2 result;
+ result.x = ffxF32ToF16(v.x) | (ffxF32ToF16(v.y) << 16);
+ result.y = ffxF32ToF16(v.z) | (ffxF32ToF16(v.w) << 16);
+ return result;
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32 ffxInvertSafe(FfxFloat32 v){
+ FfxFloat32 s = FfxFloat32(sign(v));
+ FfxFloat32 s2 = s*s;
+ return s2/(v + s2 - 1.0);
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){
+ FfxFloat32x2 s = FfxFloat32x2(sign(v));
+ FfxFloat32x2 s2 = s*s;
+ return s2/(v + s2 - FfxFloat32x2(1.0, 1.0));
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){
+ FfxFloat32x3 s = FfxFloat32x3(sign(v));
+ FfxFloat32x3 s2 = s*s;
+ return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0));
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){
+ FfxFloat32x4 s = FfxFloat32x4(sign(v));
+ FfxFloat32x4 s2 = s*s;
+ return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0));
+}
+
#define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x))
+#if FFX_HALF
+
#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x))
#define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x))
#define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x))
+
+FfxUInt32 ffxPackF16(FfxFloat16x2 v){
+ FfxUInt32x2 p = FfxUInt32x2(ffxF32ToF16(FfxFloat32x2(v).x), ffxF32ToF16(FfxFloat32x2(v).y));
+ return p.x | (p.y << 16);
+}
+
+FfxFloat16x2 ffxUnpackF16(FfxUInt32 a){
+ return FfxFloat16x2(f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16)));
+}
+
//------------------------------------------------------------------------------------------------------------------------------
FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x)
{
- return f32tof16(x.x) + (f32tof16(x.y) << 16);
+ return ffxF32ToF16(x.x) + (ffxF32ToF16(x.y) << 16);
}
FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x)
{
@@ -1182,19 +1494,19 @@ FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x)
#define FFX_UINT16X2_TO_UINT32(x) FFX_MIN16_U2ToUint32(FFX_MIN16_U2(x))
#define FFX_UINT16X4_TO_UINT32X2(x) FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4(x))
-#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
+#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST)
#define FFX_TO_UINT16(x) asuint16(x)
#define FFX_TO_UINT16X2(x) asuint16(x)
#define FFX_TO_UINT16X3(x) asuint16(x)
#define FFX_TO_UINT16X4(x) asuint16(x)
#else
-#define FFX_TO_UINT16(a) FFX_MIN16_U(f32tof16(FfxFloat32(a)))
+#define FFX_TO_UINT16(a) FFX_MIN16_U(ffxF32ToF16(FfxFloat32(a)))
#define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y))
#define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z))
#define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w))
-#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
+#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST)
-#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
+#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST)
#define FFX_TO_FLOAT16(x) asfloat16(x)
#define FFX_TO_FLOAT16X2(x) asfloat16(x)
#define FFX_TO_FLOAT16X3(x) asfloat16(x)
@@ -1204,7 +1516,7 @@ FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x)
#define FFX_TO_FLOAT16X2(a) FFX_MIN16_F2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y))
#define FFX_TO_FLOAT16X3(a) FFX_MIN16_F3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z))
#define FFX_TO_FLOAT16X4(a) FFX_MIN16_F4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w))
-#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
+#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST)
//==============================================================================================================================
#define FFX_BROADCAST_FLOAT16(a) FFX_MIN16_F(a)
@@ -1448,38 +1760,122 @@ FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b)
//==============================================================================================================================
#if defined(FFX_WAVE)
// Where 'x' must be a compile time literal.
-FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)
+FfxFloat32 ffxWaveXorF1(FfxFloat32 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
-FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
+FfxFloat32x2 ffxWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
-FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
+FfxFloat32x3 ffxWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
-FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
+FfxFloat32x4 ffxWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
-FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)
+FfxUInt32 ffxWaveXorU1(FfxUInt32 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
-FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x)
+FfxUInt32x2 ffxWaveXorU1(FfxUInt32x2 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
-FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x)
+FfxUInt32x3 ffxWaveXorU1(FfxUInt32x3 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
-FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x)
+FfxUInt32x4 ffxWaveXorU1(FfxUInt32x4 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
+FfxBoolean ffxWaveIsFirstLane()
+{
+ return WaveIsFirstLane();
+}
+FfxUInt32 ffxWaveLaneIndex()
+{
+ return WaveGetLaneIndex();
+}
+FfxBoolean ffxWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x)
+{
+ return WaveReadLaneAt(v, x);
+}
+FfxUInt32 ffxWavePrefixCountBits(FfxBoolean v)
+{
+ return WavePrefixCountBits(v);
+}
+FfxUInt32 ffxWaveActiveCountBits(FfxBoolean v)
+{
+ return WaveActiveCountBits(v);
+}
+FfxUInt32 ffxWaveReadLaneFirstU1(FfxUInt32 v)
+{
+ return WaveReadLaneFirst(v);
+}
+FfxUInt32x2 ffxWaveReadLaneFirstU2(FfxUInt32x2 v)
+{
+ return WaveReadLaneFirst(v);
+}
+FfxBoolean ffxWaveReadLaneFirstB1(FfxBoolean v)
+{
+ return WaveReadLaneFirst(v);
+}
+FfxUInt32 ffxWaveOr(FfxUInt32 a)
+{
+ return WaveActiveBitOr(a);
+}
+FfxUInt32 ffxWaveMin(FfxUInt32 a)
+{
+ return WaveActiveMin(a);
+}
+FfxFloat32 ffxWaveMin(FfxFloat32 a)
+{
+ return WaveActiveMin(a);
+}
+FfxUInt32 ffxWaveMax(FfxUInt32 a)
+{
+ return WaveActiveMax(a);
+}
+FfxFloat32 ffxWaveMax(FfxFloat32 a)
+{
+ return WaveActiveMax(a);
+}
+FfxUInt32 ffxWaveSum(FfxUInt32 a)
+{
+ return WaveActiveSum(a);
+}
+FfxFloat32 ffxWaveSum(FfxFloat32 a)
+{
+ return WaveActiveSum(a);
+}
+FfxUInt32 ffxWaveLaneCount()
+{
+ return WaveGetLaneCount();
+}
+FfxBoolean ffxWaveAllTrue(FfxBoolean v)
+{
+ return WaveActiveAllTrue(v);
+}
+FfxFloat32 ffxQuadReadX(FfxFloat32 v)
+{
+ return QuadReadAcrossX(v);
+}
+FfxFloat32x2 ffxQuadReadX(FfxFloat32x2 v)
+{
+ return QuadReadAcrossX(v);
+}
+FfxFloat32 ffxQuadReadY(FfxFloat32 v)
+{
+ return QuadReadAcrossY(v);
+}
+FfxFloat32x2 ffxQuadReadY(FfxFloat32x2 v)
+{
+ return QuadReadAcrossY(v);
+}
#if FFX_HALF
FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x)
@@ -1496,7 +1892,7 @@ FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x)
}
FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x)
{
- return AW4_FFX_UINT32(WaveReadLaneAt(FFX_UINT32_AW4(v), WaveGetLaneIndex() ^ x));
+ return FFX_UINT32X2_TO_UINT16X4(WaveReadLaneAt(FFX_UINT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x));
}
#endif // FFX_HALF
#endif // #if defined(FFX_WAVE)
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_portability.h b/thirdparty/amd-ffx/gpu/ffx_core_portability.h
similarity index 61%
rename from thirdparty/amd-fsr2/shaders/ffx_core_portability.h
rename to thirdparty/amd-ffx/gpu/ffx_core_portability.h
index 45be05973a84..12147b9a7bfe 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_core_portability.h
+++ b/thirdparty/amd-ffx/gpu/ffx_core_portability.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -19,32 +20,27 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
-FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+void ffxOpAAddOneF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
{
d = a + ffxBroadcast3(b);
- return d;
}
-FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
+void ffxOpACpyF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a)
{
d = a;
- return d;
}
-FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
+void ffxOpAMulF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
{
d = a * b;
- return d;
}
-FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+void ffxOpAMulOneF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
{
- d = a * ffxBroadcast3(b);
- return d;
+ d = a * b;
}
-FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
+void ffxOpARcpF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a)
{
- d = rcp(a);
- return d;
+ d = ffxReciprocal(a);
}
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation.h
new file mode 100644
index 000000000000..ccf2e23b87f7
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation.h
@@ -0,0 +1,187 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FRAMEINTERPOLATION_H
+#define FFX_FRAMEINTERPOLATION_H
+
+struct InterpolationSourceColor
+{
+ FfxFloat32x3 fRaw;
+ FfxFloat32x3 fLinear;
+ FfxFloat32 fBilinearWeightSum;
+};
+
+InterpolationSourceColor NewInterpolationSourceColor()
+{
+ InterpolationSourceColor c;
+ c.fRaw = FfxFloat32x3(0.0, 0.0, 0.0);
+ c.fLinear = FfxFloat32x3(0.0, 0.0, 0.0);
+ c.fBilinearWeightSum = 0.0;
+ return c;
+}
+
+InterpolationSourceColor SampleTextureBilinear(FfxBoolean isCurrent, FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxInt32x2 texSize)
+{
+ InterpolationSourceColor result = NewInterpolationSourceColor();
+
+ FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
+ BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, texSize);
+
+ FfxFloat32x3 fColor = FfxFloat32x3(0.0, 0.0, 0.0);
+ FfxFloat32 fWeightSum = 0.0f;
+ for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
+
+ const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+ const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
+
+ if (IsInRect(iSamplePos, InterpolationRectBase(), InterpolationRectSize()))
+ {
+ FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
+
+ if (isCurrent)
+ fColor += LoadCurrentBackbuffer(iSamplePos).rgb * fWeight;
+ else
+ fColor += LoadPreviousBackbuffer(iSamplePos).rgb * fWeight;
+ fWeightSum += fWeight;
+ }
+ }
+
+ //normalize colors
+ fColor = (fWeightSum != 0.0f) ? fColor / fWeightSum : FfxFloat32x3(0.0f, 0.0f, 0.0f);
+
+ result.fRaw = fColor;
+ result.fLinear = RawRGBToLinear(fColor);
+ result.fBilinearWeightSum = fWeightSum;
+
+ return result;
+}
+
+void updateInPaintingWeight(inout FfxFloat32 fInPaintingWeight, FfxFloat32 fFactor)
+{
+ fInPaintingWeight = ffxSaturate(ffxMax(fInPaintingWeight, fFactor));
+}
+
+void computeInterpolatedColor(FfxUInt32x2 iPxPos, out FfxFloat32x3 fInterpolatedColor, inout FfxFloat32 fInPaintingWeight)
+{
+ const FfxFloat32x2 fUvInInterpolationRect = (FfxFloat32x2(iPxPos - InterpolationRectBase()) + 0.5f) / InterpolationRectSize();
+ const FfxFloat32x2 fUvInScreenSpace = (FfxFloat32x2(iPxPos) + 0.5f) / DisplaySize();
+ const FfxFloat32x2 fLrUvInInterpolationRect = fUvInInterpolationRect * (FfxFloat32x2(RenderSize()) / GetMaxRenderSize());
+
+ const FfxFloat32x2 fUvLetterBoxScale = FfxFloat32x2(InterpolationRectSize()) / DisplaySize();
+
+ // game MV are top left aligned, the function scales them to render res UV
+ VectorFieldEntry gameMv;
+ LoadInpaintedGameFieldMv(fUvInInterpolationRect, gameMv);
+
+ // OF is done on the back buffers which already have black bars
+ VectorFieldEntry ofMv;
+ SampleOpticalFlowMotionVectorField(fUvInScreenSpace, ofMv);
+
+ // Binarize disucclusion factor
+ FfxFloat32x2 fDisocclusionFactor = FfxFloat32x2(FFX_EQUAL(ffxSaturate(SampleDisocclusionMask(fLrUvInInterpolationRect).xy), FfxFloat32x2(1.0, 1.0)));
+
+ InterpolationSourceColor fPrevColorGame = SampleTextureBilinear(false, fUvInScreenSpace, +gameMv.fMotionVector * fUvLetterBoxScale, DisplaySize()); // Get in previous frame buffer, the color of interpolated pixel
+ InterpolationSourceColor fCurrColorGame = SampleTextureBilinear(true, fUvInScreenSpace, -gameMv.fMotionVector * fUvLetterBoxScale, DisplaySize()); // Get color in current framebuffer, of color of interpolated pixel
+
+ InterpolationSourceColor fPrevColorOF = SampleTextureBilinear(false, fUvInScreenSpace, +ofMv.fMotionVector * fUvLetterBoxScale, DisplaySize());
+ InterpolationSourceColor fCurrColorOF = SampleTextureBilinear(true, fUvInScreenSpace, -ofMv.fMotionVector * fUvLetterBoxScale, DisplaySize());
+
+ FfxFloat32 fDisoccludedFactor = 0.0f;
+
+ // Disocclusion logic
+ {
+ fDisocclusionFactor.x *= FfxFloat32(!gameMv.bPosOutside); // fDisocclusionFactor.x of 1 means the pos of interpolated pixel is within bounds of previous frame.
+ fDisocclusionFactor.y *= FfxFloat32(!gameMv.bNegOutside); // fDisocclusionFactor.y of 1 means the pos of interpolated pixel is within bounds of current frame
+
+ // Inpaint in bi-directional disocclusion areas
+ updateInPaintingWeight(fInPaintingWeight, FfxFloat32(length(fDisocclusionFactor) <= FFX_FRAMEINTERPOLATION_EPSILON));
+
+ FfxFloat32 t = 0.5f;
+ t += 0.5f * (1 - (fDisocclusionFactor.x));
+ t -= 0.5f * (1 - (fDisocclusionFactor.y));
+ // Say if fDisocclusionFactor.x is 1 and fDisocclusionFactor.y = 0, then t will be 0. fInterpolatedColor will be entirely from fPrevColorGame
+ fInterpolatedColor = ffxLerp(fPrevColorGame.fRaw, fCurrColorGame.fRaw, ffxSaturate(t));
+ fDisoccludedFactor = ffxSaturate(1 - ffxMin(fDisocclusionFactor.x, fDisocclusionFactor.y));
+
+ if (fPrevColorGame.fBilinearWeightSum == 0.0f)
+ {
+ fInterpolatedColor = fCurrColorGame.fRaw;
+ }
+ else if (fCurrColorGame.fBilinearWeightSum == 0.0f)
+ {
+ fInterpolatedColor = fPrevColorGame.fRaw;
+ }
+ if (fPrevColorGame.fBilinearWeightSum == 0 && fCurrColorGame.fBilinearWeightSum == 0)
+ {
+ fInPaintingWeight = 1.0f;
+ }
+ }
+
+ {
+
+ FfxFloat32 ofT = 0.5f;
+
+ if (fPrevColorOF.fBilinearWeightSum > 0 && fCurrColorOF.fBilinearWeightSum > 0)
+ {
+ ofT = 0.5f;
+ }
+ else if (fPrevColorOF.fBilinearWeightSum > 0)
+ {
+ ofT = 0;
+ } else {
+ ofT = 1;
+ }
+
+ const FfxFloat32x3 ofColor = ffxLerp(fPrevColorOF.fRaw, fCurrColorOF.fRaw, ofT);
+
+ FfxFloat32 fOF_Sim = NormalizedDot3(fPrevColorOF.fRaw, fCurrColorOF.fRaw);
+ FfxFloat32 fGame_Sim = NormalizedDot3(fPrevColorGame.fRaw, fCurrColorGame.fRaw);
+
+ fGame_Sim = ffxLerp(ffxMax(FFX_FRAMEINTERPOLATION_EPSILON, fGame_Sim), 1.0f, ffxSaturate(fDisoccludedFactor));
+ FfxFloat32 fGameMvBias = ffxPow(ffxSaturate(fGame_Sim / ffxMax(FFX_FRAMEINTERPOLATION_EPSILON, fOF_Sim)), 1.0f);
+
+ const FfxFloat32 fFrameIndexFactor = FfxFloat32(FrameIndexSinceLastReset() < 10);
+ fGameMvBias = ffxLerp(fGameMvBias, 1.0f, fFrameIndexFactor);
+
+ fInterpolatedColor = ffxLerp(ofColor, fInterpolatedColor, ffxSaturate(fGameMvBias));
+ }
+}
+
+void computeFrameinterpolation(FfxInt32x2 iPxPos)
+{
+ FfxFloat32x3 fColor = FfxFloat32x3(0, 0, 0);
+ FfxFloat32 fInPaintingWeight = 0.0f;
+
+ if (IsInRect(iPxPos, InterpolationRectBase(), InterpolationRectSize()) == false || FrameIndexSinceLastReset() == 0)
+ {
+ // if we just reset or we are out of the interpolation rect, copy the current back buffer and don't interpolate
+ fColor = LoadCurrentBackbuffer(iPxPos);
+ }
+ else
+ {
+ computeInterpolatedColor(iPxPos, fColor, fInPaintingWeight);
+ }
+
+ StoreFrameinterpolationOutput(FfxInt32x2(iPxPos), FfxFloat32x4(fColor, fInPaintingWeight));
+}
+
+#endif // FFX_FRAMEINTERPOLATION_H
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h
new file mode 100644
index 000000000000..20c5c296728f
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h
@@ -0,0 +1,758 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "ffx_frameinterpolation_resources.h"
+#include "ffx_core.h"
+
+#define COUNTER_SPD 0
+#define COUNTER_FRAME_INDEX_SINCE_LAST_RESET 1
+
+ ///////////////////////////////////////////////
+ // declare CBs and CB accessors
+///////////////////////////////////////////////
+#if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION)
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION, std140) uniform cbFI_t
+ {
+ FfxInt32x2 renderSize;
+ FfxInt32x2 displaySize;
+
+ FfxFloat32x2 displaySizeRcp;
+ FfxFloat32 cameraNear;
+ FfxFloat32 cameraFar;
+
+ FfxInt32x2 upscalerTargetSize;
+ FfxInt32 Mode;
+ FfxInt32 reset;
+
+ FfxFloat32x4 fDeviceToViewDepth;
+
+ FfxFloat32 deltaTime;
+ FfxInt32 HUDLessAttachedFactor;
+ FfxInt32x2 distortionFieldSize;
+
+ FfxFloat32x2 opticalFlowScale;
+ FfxInt32 opticalFlowBlockSize;
+ FfxUInt32 dispatchFlags;
+
+ FfxInt32x2 maxRenderSize;
+ FfxInt32 opticalFlowHalfResMode;
+ FfxInt32 NumInstances;
+
+ FfxInt32x2 interpolationRectBase;
+ FfxInt32x2 interpolationRectSize;
+
+ FfxFloat32x3 debugBarColor;
+ FfxUInt32 backBufferTransferFunction;
+
+ FfxFloat32x2 minMaxLuminance;
+ FfxFloat32 fTanHalfFOV;
+ FfxInt32 _pad1;
+
+ FfxFloat32x2 fJitter;
+ FfxFloat32x2 fMotionVectorScale;
+ } cbFI;
+
+ FfxFloat32x2 Jitter()
+ {
+ return cbFI.fJitter;
+ }
+
+ FfxInt32x2 InterpolationRectBase()
+ {
+ return cbFI.interpolationRectBase;
+ }
+
+ FfxInt32x2 InterpolationRectSize()
+ {
+ return cbFI.interpolationRectSize;
+ }
+
+ FfxFloat32x2 MotionVectorScale()
+ {
+ return cbFI.fMotionVectorScale;
+ }
+
+ FfxInt32x2 RenderSize()
+ {
+ return cbFI.renderSize;
+ }
+
+ FfxInt32x2 DisplaySize()
+ {
+ return cbFI.displaySize;
+ }
+
+ FfxBoolean Reset()
+ {
+ return cbFI.reset == 1;
+ }
+
+ FfxFloat32x4 DeviceToViewSpaceTransformFactors()
+ {
+ return cbFI.fDeviceToViewDepth;
+ }
+
+ FfxInt32x2 GetOpticalFlowSize()
+ {
+ FfxInt32x2 iOpticalFlowSize = FfxInt32x2((1.0 / cbFI.opticalFlowScale) / FfxFloat32x2(cbFI.opticalFlowBlockSize.xx));
+
+ return iOpticalFlowSize;
+ }
+
+ FfxInt32x2 GetOpticalFlowSize2()
+ {
+ return GetOpticalFlowSize() * 1;
+ }
+
+ FfxFloat32x2 GetOpticalFlowScale()
+ {
+ return cbFI.opticalFlowScale;
+ }
+
+ FfxInt32 GetOpticalFlowBlockSize()
+ {
+ return cbFI.opticalFlowBlockSize;
+ }
+
+ FfxInt32 GetHUDLessAttachedFactor()
+ {
+ return cbFI.HUDLessAttachedFactor;
+ }
+
+ FfxInt32x2 GetDistortionFieldSize()
+ {
+ return cbFI.distortionFieldSize;
+ }
+
+ FfxUInt32 GetDispatchFlags()
+ {
+ return cbFI.dispatchFlags;
+ }
+
+ FfxInt32x2 GetMaxRenderSize()
+ {
+ return cbFI.maxRenderSize;
+ }
+
+ FfxInt32 GetOpticalFlowHalfResMode()
+ {
+ return cbFI.opticalFlowHalfResMode;
+ }
+
+ FfxFloat32x3 GetDebugBarColor()
+ {
+ return cbFI.debugBarColor;
+ }
+
+ FfxFloat32 TanHalfFoV()
+ {
+ return cbFI.fTanHalfFOV;
+ }
+
+ FfxUInt32 BackBufferTransferFunction()
+ {
+ return cbFI.backBufferTransferFunction;
+ }
+
+ FfxFloat32 MinLuminance()
+ {
+ return cbFI.minMaxLuminance[0];
+ }
+
+ FfxFloat32 MaxLuminance()
+ {
+ return cbFI.minMaxLuminance[1];
+ }
+
+#endif // defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION)
+
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID)
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID, std140) uniform cbInpaintingPyramid_t
+ {
+ FfxUInt32 mips;
+ FfxUInt32 numWorkGroups;
+ FfxUInt32x2 workGroupOffset;
+ } cbInpaintingPyramid;
+
+ FfxUInt32 NumMips()
+ {
+ return cbInpaintingPyramid.mips;
+ }
+ FfxUInt32 NumWorkGroups()
+ {
+ return cbInpaintingPyramid.numWorkGroups;
+ }
+ FfxUInt32x2 WorkGroupOffset()
+ {
+ return cbInpaintingPyramid.workGroupOffset;
+ }
+
+#endif // defined(FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID)
+
+
+ ///////////////////////////////////////////////
+ // declare samplers
+///////////////////////////////////////////////
+
+
+layout (set = 0, binding = 1000) uniform sampler s_LinearClamp;
+
+ ///////////////////////////////////////////////
+ // declare SRVs and SRV accessors
+///////////////////////////////////////////////
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_PREVIOUS_INTERPOLATION_SOURCE
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_PREVIOUS_INTERPOLATION_SOURCE) uniform texture2D r_previous_interpolation_source;
+
+ FfxFloat32x3 LoadPreviousBackbuffer(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return texelFetch(r_previous_interpolation_source, iPxPos, 0).rgb;
+ }
+ FfxFloat32x3 SamplePreviousBackbuffer(FFX_PARAMETER_IN FfxFloat32x2 fUv)
+ {
+ return textureLod(sampler2D(r_previous_interpolation_source, s_LinearClamp), fUv, 0.0).xyz;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE) uniform texture2D r_current_interpolation_source;
+
+ FfxFloat32x3 LoadCurrentBackbuffer(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return texelFetch(r_current_interpolation_source, iPxPos, 0).rgb;
+ }
+ FfxFloat32x3 SampleCurrentBackbuffer(FFX_PARAMETER_IN FfxFloat32x2 fUv)
+ {
+ return textureLod(sampler2D(r_current_interpolation_source, s_LinearClamp), fUv, 0.0).xyz;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_MOTION_VECTORS
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors;
+
+ FfxFloat32x2 LoadDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return texelFetch(r_dilated_motion_vectors, iPxPos, 0).xy;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_DEPTH
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilated_depth;
+
+ FfxFloat32 LoadDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return texelFetch(r_dilated_depth, iPxPos, 0).x;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME) uniform utexture2D r_reconstructed_depth_previous_frame;
+
+ FfxFloat32 LoadReconstructedDepthPreviousFrame(FFX_PARAMETER_IN FfxInt32x2 iPxInput)
+ {
+ return ffxAsFloat(texelFetch(r_reconstructed_depth_previous_frame, iPxInput, 0).x);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME) uniform utexture2D r_reconstructed_depth_interpolated_frame;
+
+ FfxFloat32 LoadEstimatedInterpolationFrameDepth(FFX_PARAMETER_IN FfxInt32x2 iPxInput)
+ {
+ return ffxAsFloat(texelFetch(r_reconstructed_depth_interpolated_frame, iPxInput, 0).x);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_DISOCCLUSION_MASK
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_DISOCCLUSION_MASK) uniform texture2D r_disocclusion_mask;
+
+ FfxFloat32x4 LoadDisocclusionMask(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return texelFetch(r_disocclusion_mask, iPxPos, 0);
+ }
+ FfxFloat32x4 SampleDisocclusionMask(FFX_PARAMETER_IN FfxFloat32x2 fUv)
+ {
+ return textureLod(sampler2D(r_disocclusion_mask, s_LinearClamp), fUv, 0);
+ }
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y)
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X) uniform utexture2D r_game_motion_vector_field_x;
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y) uniform utexture2D r_game_motion_vector_field_y;
+
+ FfxUInt32x2 LoadGameFieldMv(FFX_PARAMETER_IN FfxInt32x2 iPxSample)
+ {
+ FfxUInt32 packedX = texelFetch(r_game_motion_vector_field_x, iPxSample, 0).x;
+ FfxUInt32 packedY = texelFetch(r_game_motion_vector_field_y, iPxSample, 0).x;
+
+ return FfxUInt32x2(packedX, packedY);
+ }
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y)
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X) uniform utexture2D r_optical_flow_motion_vector_field_x;
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y) uniform utexture2D r_optical_flow_motion_vector_field_y;
+
+ FfxUInt32x2 LoadOpticalFlowFieldMv(FFX_PARAMETER_IN FfxInt32x2 iPxSample)
+ {
+ FfxUInt32 packedX = texelFetch(r_optical_flow_motion_vector_field_x, iPxSample, 0).x;
+ FfxUInt32 packedY = texelFetch(r_optical_flow_motion_vector_field_y, iPxSample, 0).x;
+
+ return FfxUInt32x2(packedX, packedY);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW) uniform itexture2D r_optical_flow;
+
+ #if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION)
+ FfxFloat32x2 LoadOpticalFlow(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return texelFetch(r_optical_flow, iPxPos, 0).xy * GetOpticalFlowScale();
+ }
+ #endif
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_UPSAMPLED
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_UPSAMPLED) uniform texture2D r_optical_flow_upsampled;
+
+ FfxFloat32x2 LoadOpticalFlowUpsampled(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return texelFetch(r_optical_flow_upsampled, iPxPos, 0).xy;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_CONFIDENCE
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_CONFIDENCE) uniform utexture2D r_optical_flow_confidence;
+
+ FfxFloat32 LoadOpticalFlowConfidence(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return texelFetch(r_optical_flow_confidence, iPxPos, 0).y;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION) uniform utexture2D r_optical_flow_global_motion;
+
+ FfxUInt32 LoadOpticalFlowGlobalMotion(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return texelFetch(r_optical_flow_global_motion, iPxPos, 0).x;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_SCENE_CHANGE_DETECTION
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_SCENE_CHANGE_DETECTION) uniform utexture2D r_optical_flow_scd;
+
+ FfxUInt32 LoadOpticalFlowSceneChangeDetection(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return texelFetch(r_optical_flow_scd, iPxPos, 0).x;
+ }
+
+ FfxBoolean HasSceneChanged()
+ {
+ #define SCD_OUTPUT_HISTORY_BITS_SLOT 1
+ //if (FrameIndex() <= 5) // threshold according to original OpenCL code
+ //{
+ // return 1.0;
+ //}
+ //else
+ {
+ // Report that the scene is changed if the change was detected in any of the
+ // 4 previous frames (0xfu - covers 4 history bits).
+ return ((texelFetch(r_optical_flow_scd, FfxInt32x2(SCD_OUTPUT_HISTORY_BITS_SLOT, 0), 0).x) & 0xfu) != 0;
+ }
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_DEBUG
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_DEBUG) uniform texture2D r_optical_flow_debug;
+
+ FfxFloat32x4 LoadOpticalFlowDebug(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return texelFetch(r_optical_flow_debug, iPxPos, 0);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OUTPUT
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OUTPUT) uniform texture2D r_output;
+
+ FfxFloat32x4 LoadFrameInterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxInput)
+ {
+ return texelFetch(r_output, iPxInput, 0);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID) uniform texture2D r_inpainting_pyramid;
+
+ FfxFloat32x4 LoadInpaintingPyramid(FFX_PARAMETER_IN FfxInt32 mipLevel, FFX_PARAMETER_IN FfxUInt32x2 iPxInput)
+ {
+ return texelFetch(r_inpainting_pyramid, FfxInt32x2(iPxInput), mipLevel);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_PRESENT_BACKBUFFER
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_PRESENT_BACKBUFFER) uniform texture2D r_present_backbuffer;
+
+ FfxFloat32x4 LoadPresentBackbuffer(FFX_PARAMETER_IN FfxInt32x2 iPxInput)
+ {
+ return texelFetch(r_present_backbuffer, iPxInput, 0);
+ }
+ FfxFloat32x4 SamplePresentBackbuffer(FFX_PARAMETER_IN FfxFloat32x2 fUv)
+ {
+ return textureLod(sampler2D(r_present_backbuffer, s_LinearClamp), fUv, 0.0);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_COUNTERS
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_COUNTERS) readonly buffer FrameInterpolationCounters_t
+ {
+ FfxUInt32 data[];
+ } r_counters;
+
+ FfxUInt32 LoadCounter(FFX_PARAMETER_IN FfxInt32 iPxPos)
+ {
+ return r_counters.data[iPxPos];
+ }
+
+ FfxUInt32 FrameIndexSinceLastReset()
+ {
+ return LoadCounter(COUNTER_FRAME_INDEX_SINCE_LAST_RESET);
+ }
+#endif
+
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_DEPTH)
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth;
+
+ FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos)
+ {
+ return texelFetch(r_input_depth, iPxPos, 0).x;
+ }
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_MOTION_VECTORS)
+ layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors;
+
+ FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos)
+ {
+ FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy;
+
+ FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
+
+ #if FFX_FRAMEINTERPOLATION_OPTION_JITTERED_MOTION_VECTORS
+ fUvMotionVector -= MotionVectorJitterCancellation();
+ #endif
+
+ return fUvMotionVector;
+ }
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD)
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD) uniform texture2D r_input_distortion_field;
+ FfxFloat32x2 SampleDistortionField(FFX_PARAMETER_IN FfxFloat32x2 fUv)
+ {
+ return textureLod(sampler2D(r_input_distortion_field, s_LinearClamp), fUv, 0.0).xy;
+ }
+#endif
+
+///////////////////////////////////////////////
+// declare UAVs and UAV accessors
+///////////////////////////////////////////////
+#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT /* app controlled format */) uniform image2D rw_output;
+
+ FfxFloat32x4 RWLoadFrameinterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return imageLoad(rw_output, iPxPos);
+ }
+
+ void StoreFrameinterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 val)
+ {
+ imageStore(rw_output, iPxPos, val);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_MOTION_VECTORS
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) uniform image2D rw_dilated_motion_vectors;
+
+ FfxFloat32x2 RWLoadDilatedMotionVectors(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return imageLoad(rw_dilated_motion_vectors, iPxPos).xy;
+ }
+
+ void StoreDilatedMotionVectors(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 val)
+ {
+ imageStore(rw_dilated_motion_vectors, iPxPos, FfxFloat32x4(val, 0.0, 0.0));
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_DEPTH
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_DEPTH, r32f) uniform image2D rw_dilated_depth;
+
+ FfxFloat32 RWLoadDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return imageLoad(rw_dilated_depth, iPxPos).x;
+ }
+
+ void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 val)
+ {
+ imageStore(rw_dilated_depth, iPxPos, FfxFloat32x4(val, 0.0, 0.0, 0.0));
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME, r32ui) uniform uimage2D rw_reconstructed_depth_previous_frame;
+
+ FfxFloat32 RWLoadReconstructedDepthPreviousFrame(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return ffxAsFloat(imageLoad(rw_reconstructed_depth_previous_frame, iPxPos).x);
+ }
+
+ void UpdateReconstructedDepthPreviousFrame(FfxInt32x2 iPxSample, FfxFloat32 fDepth)
+ {
+ FfxUInt32 uDepth = ffxAsUInt32(fDepth);
+
+#if FFX_FRAMEINTERPOLATION_OPTION_INVERTED_DEPTH
+ imageAtomicMax(rw_reconstructed_depth_previous_frame, iPxSample, uDepth);
+#else
+ imageAtomicMin(rw_reconstructed_depth_previous_frame, iPxSample, uDepth); // min for standard, max for inverted depth
+#endif
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME, r32ui) uniform uimage2D rw_reconstructed_depth_interpolated_frame;
+
+ FfxFloat32 RWLoadReconstructedDepthInterpolatedFrame(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return ffxAsFloat(imageLoad(rw_reconstructed_depth_interpolated_frame, iPxPos).x);
+ }
+
+ void StoreReconstructedDepthInterpolatedFrame(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 value)
+ {
+ FfxUInt32 uDepth = ffxAsUInt32(value);
+ imageStore(rw_reconstructed_depth_interpolated_frame, iPxPos, FfxUInt32x4(uDepth, 0, 0, 0));
+ }
+
+ void UpdateReconstructedDepthInterpolatedFrame(FfxInt32x2 iPxSample, FfxFloat32 fDepth)
+ {
+ FfxUInt32 uDepth = ffxAsUInt32(fDepth);
+
+#if FFX_FRAMEINTERPOLATION_OPTION_INVERTED_DEPTH
+ imageAtomicMax(rw_reconstructed_depth_interpolated_frame, iPxSample, uDepth);
+#else
+ imageAtomicMin(rw_reconstructed_depth_interpolated_frame, iPxSample, uDepth); // min for standard, max for inverted depth
+#endif
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_DISOCCLUSION_MASK
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_DISOCCLUSION_MASK, rg8) uniform image2D rw_disocclusion_mask;
+
+ FfxFloat32x2 RWLoadDisocclusionMask(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return imageLoad(rw_disocclusion_mask, iPxPos).xy;
+ }
+
+ void StoreDisocclusionMask(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 val)
+ {
+ imageStore(rw_disocclusion_mask, iPxPos, FfxFloat32x4(val, 0.0, 0.0));
+ }
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_X) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_Y)
+
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_X, r32ui) uniform uimage2D rw_game_motion_vector_field_x;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_Y, r32ui) uniform uimage2D rw_game_motion_vector_field_y;
+
+ FfxUInt32 RWLoadGameMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return imageLoad(rw_game_motion_vector_field_x, iPxPos).x;
+ }
+
+ void StoreGameMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val)
+ {
+ imageStore(rw_game_motion_vector_field_x, iPxPos, FfxUInt32x4(val, 0, 0, 0));
+ }
+
+ FfxUInt32 RWLoadGameMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return imageLoad(rw_game_motion_vector_field_y, iPxPos).x;
+ }
+
+ void StoreGameMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val)
+ {
+ imageStore(rw_game_motion_vector_field_y, iPxPos, FfxUInt32x4(val, 0, 0, 0));
+ }
+
+ void UpdateGameMotionVectorField(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32x2 packedVector)
+ {
+ imageAtomicMax(rw_game_motion_vector_field_x, iPxPos, packedVector.x);
+ imageAtomicMax(rw_game_motion_vector_field_y, iPxPos, packedVector.y);
+ }
+
+ FfxUInt32 UpdateGameMotionVectorFieldEx(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32x2 packedVector)
+ {
+ FfxUInt32 uPreviousValueX = imageAtomicMax(rw_game_motion_vector_field_x, iPxPos, packedVector.x);
+ FfxUInt32 uPreviousValueY = imageAtomicMax(rw_game_motion_vector_field_y, iPxPos, packedVector.y);
+
+ const FfxUInt32 uExistingVectorFieldEntry = ffxMax(uPreviousValueX, uPreviousValueY);
+
+ return uExistingVectorFieldEntry;
+ }
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y)
+
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X, r32ui) uniform uimage2D rw_optical_flow_motion_vector_field_x;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y, r32ui) uniform uimage2D rw_optical_flow_motion_vector_field_y;
+
+ FfxUInt32 RWLoadOpticalflowMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return imageLoad(rw_optical_flow_motion_vector_field_x, iPxPos).x;
+ }
+ void StoreOpticalflowMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val)
+ {
+ imageStore(rw_optical_flow_motion_vector_field_x, iPxPos, FfxUInt32x4(val, 0, 0, 0));
+ }
+ FfxUInt32 RWLoadOpticalflowMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return imageLoad(rw_optical_flow_motion_vector_field_y, iPxPos).x;
+ }
+ void StoreOpticalflowMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val)
+ {
+ imageStore(rw_optical_flow_motion_vector_field_y, iPxPos, FfxUInt32x4(val, 0, 0, 0));
+ }
+ void UpdateOpticalflowMotionVectorField(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32x2 packedVector)
+ {
+ imageAtomicMax(rw_optical_flow_motion_vector_field_x, iPxPos, packedVector.x);
+ imageAtomicMax(rw_optical_flow_motion_vector_field_y, iPxPos, packedVector.y);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_COUNTERS
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_COUNTERS) coherent buffer FrameInterpolationRWCounters_t
+ {
+ FfxUInt32 data[];
+ } rw_counters;
+
+ FfxUInt32 RWLoadCounter(FFX_PARAMETER_IN FfxInt32 iPxPos)
+ {
+ return rw_counters.data[iPxPos];
+ }
+
+ void StoreCounter(FFX_PARAMETER_IN FfxInt32 iPxPos, FFX_PARAMETER_IN FfxUInt32 counter)
+ {
+ rw_counters.data[iPxPos] = counter;
+ }
+ void AtomicIncreaseCounter(FFX_PARAMETER_IN FfxInt32 iPxPos, FFX_PARAMETER_OUT FfxUInt32 oldVal)
+ {
+ oldVal = atomicAdd(rw_counters.data[iPxPos], 1);
+ }
+#endif
+
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_0) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_1) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_2) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_3) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_4) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_5) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_6) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_7) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_8) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_9) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_10) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_11) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_12)
+
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_0, rgba16f) uniform image2D rw_inpainting_pyramid0;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_1, rgba16f) uniform image2D rw_inpainting_pyramid1;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_2, rgba16f) uniform image2D rw_inpainting_pyramid2;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_3, rgba16f) uniform image2D rw_inpainting_pyramid3;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_4, rgba16f) uniform image2D rw_inpainting_pyramid4;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_5, rgba16f) coherent uniform image2D rw_inpainting_pyramid5;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_6, rgba16f) uniform image2D rw_inpainting_pyramid6;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_7, rgba16f) uniform image2D rw_inpainting_pyramid7;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_8, rgba16f) uniform image2D rw_inpainting_pyramid8;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_9, rgba16f) uniform image2D rw_inpainting_pyramid9;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_10, rgba16f) uniform image2D rw_inpainting_pyramid10;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_11, rgba16f) uniform image2D rw_inpainting_pyramid11;
+ layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_12, rgba16f) uniform image2D rw_inpainting_pyramid12;
+
+
+ FfxFloat32x4 RWLoadInpaintingPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 index)
+ {
+ #define LOAD(idx) \
+ if (index == idx) \
+ { \
+ return imageLoad(rw_inpainting_pyramid##idx, iPxPos); \
+ }
+ LOAD(0);
+ LOAD(1);
+ LOAD(2);
+ LOAD(3);
+ LOAD(4);
+ LOAD(5);
+ LOAD(6);
+ LOAD(7);
+ LOAD(8);
+ LOAD(9);
+ LOAD(10);
+ LOAD(11);
+ LOAD(12);
+ return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+
+ #undef LOAD
+ }
+
+ void StoreInpaintingPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 outValue, FFX_PARAMETER_IN FfxUInt32 index)
+ {
+ #define STORE(idx) \
+ if (index == idx) \
+ { \
+ imageStore(rw_inpainting_pyramid##idx, iPxPos, outValue); \
+ }
+
+ STORE(0);
+ STORE(1);
+ STORE(2);
+ STORE(3);
+ STORE(4);
+ STORE(5);
+ STORE(6);
+ STORE(7);
+ STORE(8);
+ STORE(9);
+ STORE(10);
+ STORE(11);
+ STORE(12);
+
+ #undef STORE
+ }
+#endif
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_hlsl.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_hlsl.h
new file mode 100644
index 000000000000..a58743d2f60c
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_hlsl.h
@@ -0,0 +1,814 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "ffx_frameinterpolation_resources.h"
+
+#if defined(FFX_GPU)
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic push
+#pragma dxc diagnostic ignored "-Wambig-lit-shift"
+#endif //__hlsl_dx_compiler
+#include "ffx_core.h"
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic pop
+#endif //__hlsl_dx_compiler
+#endif // #if defined(FFX_GPU)
+
+#if defined(FFX_GPU)
+
+#define COUNTER_SPD 0
+#define COUNTER_FRAME_INDEX_SINCE_LAST_RESET 1
+
+ ///////////////////////////////////////////////
+ // declare CBs and CB accessors
+///////////////////////////////////////////////
+#if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION)
+ cbuffer cbFI : FFX_DECLARE_CB(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION)
+ {
+ FfxInt32x2 renderSize;
+ FfxInt32x2 displaySize;
+
+ FfxFloat32x2 displaySizeRcp;
+ FfxFloat32 cameraNear;
+ FfxFloat32 cameraFar;
+
+ FfxInt32x2 upscalerTargetSize;
+ FfxInt32 Mode;
+ FfxInt32 reset;
+
+ FfxFloat32x4 fDeviceToViewDepth;
+
+ FfxFloat32 deltaTime;
+ FfxInt32 HUDLessAttachedFactor;
+ FfxInt32x2 distortionFieldSize;
+
+ FfxFloat32x2 opticalFlowScale;
+ FfxInt32 opticalFlowBlockSize;
+ FfxUInt32 dispatchFlags;
+
+ FfxInt32x2 maxRenderSize;
+ FfxInt32 opticalFlowHalfResMode;
+ FfxInt32 NumInstances;
+
+ FfxInt32x2 interpolationRectBase;
+ FfxInt32x2 interpolationRectSize;
+
+ FfxFloat32x3 debugBarColor;
+ FfxUInt32 backBufferTransferFunction;
+
+ FfxFloat32x2 minMaxLuminance;
+ FfxFloat32 fTanHalfFOV;
+ FfxInt32 _pad1;
+
+ FfxFloat32x2 fJitter;
+ FfxFloat32x2 fMotionVectorScale;
+ }
+
+ const FfxFloat32x2 Jitter()
+ {
+ return fJitter;
+ }
+
+ const FfxFloat32x2 MotionVectorScale()
+ {
+ return fMotionVectorScale;
+ }
+
+ const FfxInt32x2 InterpolationRectBase()
+ {
+ return interpolationRectBase;
+ }
+
+ const FfxInt32x2 InterpolationRectSize()
+ {
+ return interpolationRectSize;
+ }
+
+ const FfxInt32x2 RenderSize()
+ {
+ return renderSize;
+ }
+
+ const FfxInt32x2 DisplaySize()
+ {
+ return displaySize;
+ }
+
+ const FfxBoolean Reset()
+ {
+ return reset == 1;
+ }
+
+ FfxFloat32x4 DeviceToViewSpaceTransformFactors()
+ {
+ return fDeviceToViewDepth;
+ }
+
+ FfxInt32x2 GetOpticalFlowSize()
+ {
+ FfxInt32x2 iOpticalFlowSize = (1.0f / opticalFlowScale) / FfxFloat32x2(opticalFlowBlockSize.xx);
+
+ return iOpticalFlowSize;
+ }
+
+ FfxInt32x2 GetOpticalFlowSize2()
+ {
+ return GetOpticalFlowSize() * 1;
+ }
+
+ FfxFloat32x2 GetOpticalFlowScale()
+ {
+ return opticalFlowScale;
+ }
+
+ FfxInt32 GetOpticalFlowBlockSize()
+ {
+ return opticalFlowBlockSize;
+ }
+
+ FfxInt32 GetHUDLessAttachedFactor()
+ {
+ return HUDLessAttachedFactor;
+ }
+
+ FfxInt32x2 GetDistortionFieldSize()
+ {
+ return distortionFieldSize;
+ }
+
+ FfxUInt32 GetDispatchFlags()
+ {
+ return dispatchFlags;
+ }
+
+ FfxInt32x2 GetMaxRenderSize()
+ {
+ return maxRenderSize;
+ }
+
+ FfxInt32 GetOpticalFlowHalfResMode()
+ {
+ return opticalFlowHalfResMode;
+ }
+
+ FfxFloat32x3 GetDebugBarColor()
+ {
+ return debugBarColor;
+ }
+
+ FfxFloat32 TanHalfFoV()
+ {
+ return fTanHalfFOV;
+ }
+
+ FfxUInt32 BackBufferTransferFunction()
+ {
+ return backBufferTransferFunction;
+ }
+
+ FfxFloat32 MinLuminance()
+ {
+ return minMaxLuminance[0];
+ }
+
+ FfxFloat32 MaxLuminance()
+ {
+ return minMaxLuminance[1];
+ }
+
+#endif // #if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION)
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID)
+ cbuffer cbInpaintingPyramid : FFX_DECLARE_CB(FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID)
+ {
+ FfxUInt32 mips;
+ FfxUInt32 numWorkGroups;
+ FfxUInt32x2 workGroupOffset;
+ }
+
+ FfxUInt32 NumMips()
+ {
+ return mips;
+ }
+ FfxUInt32 NumWorkGroups()
+ {
+ return numWorkGroups;
+ }
+ FfxUInt32x2 WorkGroupOffset()
+ {
+ return workGroupOffset;
+ }
+#endif // #if defined(FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID)
+
+#define FFX_FRAMEINTERPOLATION_ROOTSIG_STRINGIFY(p) FFX_FRAMEINTERPOLATION_ROOTSIG_STR(p)
+#define FFX_FRAMEINTERPOLATION_ROOTSIG_STR(p) #p
+#define FFX_FRAMEINTERPOLATION_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FRAMEINTERPOLATION_ROOTSIG_STRINGIFY(FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "DescriptorTable(SRV(t0, numDescriptors = " FFX_FRAMEINTERPOLATION_ROOTSIG_STRINGIFY(FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "CBV(b0), " \
+ "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+
+#define FFX_FRAMEINTERPOLATION_INPAINTING_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FRAMEINTERPOLATION_ROOTSIG_STRINGIFY(FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "DescriptorTable(SRV(t0, numDescriptors = " FFX_FRAMEINTERPOLATION_ROOTSIG_STRINGIFY(FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "CBV(b0), " \
+ "CBV(b1), " \
+ "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+
+#if defined(FFX_FRAMEINTERPOLATION_EMBED_ROOTSIG)
+#define FFX_FRAMEINTERPOLATION_EMBED_ROOTSIG_CONTENT FFX_FRAMEINTERPOLATION_ROOTSIG
+#define FFX_FRAMEINTERPOLATION_EMBED_INPAINTING_ROOTSIG_CONTENT FFX_FRAMEINTERPOLATION_INPAINTING_ROOTSIG
+#else
+#define FFX_FRAMEINTERPOLATION_EMBED_ROOTSIG_CONTENT
+#define FFX_FRAMEINTERPOLATION_EMBED_INPAINTING_ROOTSIG_CONTENT
+#endif // #if FFX_FRAMEINTERPOLATION_EMBED_ROOTSIG
+
+///////////////////////////////////////////////
+// declare samplers
+///////////////////////////////////////////////
+
+SamplerState s_LinearClamp : register(s0);
+
+///////////////////////////////////////////////
+// declare SRVs and SRV accessors
+///////////////////////////////////////////////
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_PREVIOUS_INTERPOLATION_SOURCE
+ Texture2D r_previous_interpolation_source : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_PREVIOUS_INTERPOLATION_SOURCE);
+
+ FfxFloat32x3 LoadPreviousBackbuffer(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return r_previous_interpolation_source[iPxPos].rgb;
+ }
+ FfxFloat32x3 SamplePreviousBackbuffer(FFX_PARAMETER_IN FfxFloat32x2 fUv)
+ {
+ return r_previous_interpolation_source.SampleLevel(s_LinearClamp, fUv, 0).xyz;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE
+ Texture2D r_current_interpolation_source : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE);
+
+ FfxFloat32x3 LoadCurrentBackbuffer(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return r_current_interpolation_source[iPxPos].rgb;
+ }
+ FfxFloat32x3 SampleCurrentBackbuffer(FFX_PARAMETER_IN FfxFloat32x2 fUv)
+ {
+ return r_current_interpolation_source.SampleLevel(s_LinearClamp, fUv, 0).xyz;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_MOTION_VECTORS
+ Texture2D r_dilated_motion_vectors : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_MOTION_VECTORS);
+
+ FfxFloat32x2 LoadDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return r_dilated_motion_vectors[iPxPos].xy;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_DEPTH
+ Texture2D r_dilated_depth : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_DEPTH);
+
+ FfxFloat32 LoadDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return r_dilated_depth[iPxPos].x;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME
+ Texture2D r_reconstructed_depth_previous_frame : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME);
+
+ FfxFloat32 LoadReconstructedDepthPreviousFrame(FFX_PARAMETER_IN FfxInt32x2 iPxInput)
+ {
+ return asfloat(r_reconstructed_depth_previous_frame[iPxInput]);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME
+ Texture2D r_reconstructed_depth_interpolated_frame : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME);
+
+ FfxFloat32 LoadEstimatedInterpolationFrameDepth(FFX_PARAMETER_IN FfxInt32x2 iPxInput)
+ {
+ return asfloat(r_reconstructed_depth_interpolated_frame[iPxInput]);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_DISOCCLUSION_MASK
+ Texture2D r_disocclusion_mask : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_DISOCCLUSION_MASK);
+
+ FfxFloat32x4 LoadDisocclusionMask(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return r_disocclusion_mask[iPxPos];
+ }
+ FfxFloat32x4 SampleDisocclusionMask(FFX_PARAMETER_IN FfxFloat32x2 fUv)
+ {
+ return r_disocclusion_mask.SampleLevel(s_LinearClamp, fUv, 0);
+ }
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y)
+ Texture2D r_game_motion_vector_field_x : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X);
+ Texture2D r_game_motion_vector_field_y : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y);
+
+ FfxUInt32x2 LoadGameFieldMv(FFX_PARAMETER_IN FfxInt32x2 iPxSample)
+ {
+ FfxUInt32 packedX = r_game_motion_vector_field_x[iPxSample];
+ FfxUInt32 packedY = r_game_motion_vector_field_y[iPxSample];
+
+ return FfxUInt32x2(packedX, packedY);
+ }
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y)
+ Texture2D r_optical_flow_motion_vector_field_x : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X);
+ Texture2D r_optical_flow_motion_vector_field_y : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y);
+
+ FfxUInt32x2 LoadOpticalFlowFieldMv(FFX_PARAMETER_IN FfxInt32x2 iPxSample)
+ {
+ FfxUInt32 packedX = r_optical_flow_motion_vector_field_x[iPxSample];
+ FfxUInt32 packedY = r_optical_flow_motion_vector_field_y[iPxSample];
+
+ return FfxUInt32x2(packedX, packedY);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW
+ Texture2D r_optical_flow : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW);
+
+ #if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION)
+ FfxFloat32x2 LoadOpticalFlow(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return r_optical_flow[iPxPos] * GetOpticalFlowScale();
+ }
+ #endif
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_UPSAMPLED
+ Texture2D r_optical_flow_upsampled : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_UPSAMPLED);
+
+ FfxFloat32x2 LoadOpticalFlowUpsampled(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return r_optical_flow_upsampled[iPxPos];
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_CONFIDENCE
+ Texture2D r_optical_flow_confidence : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_CONFIDENCE);
+
+ FfxFloat32 LoadOpticalFlowConfidence(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return r_optical_flow_confidence[iPxPos].y;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION
+ Texture2D r_optical_flow_global_motion : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION);
+
+ FfxUInt32 LoadOpticalFlowGlobalMotion(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return r_optical_flow_global_motion[iPxPos];
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_SCENE_CHANGE_DETECTION
+ Texture2D r_optical_flow_scd : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_SCENE_CHANGE_DETECTION);
+
+ FfxUInt32 LoadOpticalFlowSceneChangeDetection(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return r_optical_flow_scd[iPxPos];
+ }
+
+ FfxBoolean HasSceneChanged()
+ {
+ #define SCD_OUTPUT_HISTORY_BITS_SLOT 1
+ //if (FrameIndex() <= 5) // threshold according to original OpenCL code
+ //{
+ // return 1.0;
+ //}
+ //else
+ {
+ // Report that the scene is changed if the change was detected in any of the
+ // 4 previous frames (0xfu - covers 4 history bits).
+ return (r_optical_flow_scd[FfxInt32x2(SCD_OUTPUT_HISTORY_BITS_SLOT, 0)] & 0xfu) != 0;
+ }
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_DEBUG
+ Texture2D r_optical_flow_debug : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_DEBUG);
+
+ FfxFloat32x4 LoadOpticalFlowDebug(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return r_optical_flow_debug[iPxPos];
+ }
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_MASK) && defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OUTPUT)
+ Texture2D r_output : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OUTPUT);
+ Texture2D r_inpainting_mask : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_MASK);
+
+ FfxFloat32x4 LoadFrameInterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxInput)
+ {
+ return FfxFloat32x4(r_output[iPxInput], r_inpainting_mask[iPxInput]);
+ }
+#elif defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OUTPUT)
+ Texture2D r_output : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OUTPUT);
+ FfxFloat32x4 LoadFrameInterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxInput)
+ {
+ return r_output[iPxInput];
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID
+ Texture2D r_inpainting_pyramid : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID);
+
+ FfxFloat32x4 LoadInpaintingPyramid(FFX_PARAMETER_IN FfxInt32 mipLevel, FFX_PARAMETER_IN FfxUInt32x2 iPxInput)
+ {
+ return r_inpainting_pyramid.mips[mipLevel][iPxInput];
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_PRESENT_BACKBUFFER
+ Texture2D r_present_backbuffer : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_PRESENT_BACKBUFFER);
+
+ FfxFloat32x4 LoadPresentBackbuffer(FFX_PARAMETER_IN FfxInt32x2 iPxInput)
+ {
+ return r_present_backbuffer[iPxInput];
+ }
+ FfxFloat32x4 SamplePresentBackbuffer(FFX_PARAMETER_IN FfxFloat32x2 fUv)
+ {
+ return r_present_backbuffer.SampleLevel(s_LinearClamp, fUv, 0);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_COUNTERS
+ StructuredBuffer r_counters : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_COUNTERS);
+
+ FfxUInt32 LoadCounter(FFX_PARAMETER_IN FfxInt32 iPxPos)
+ {
+ return r_counters[iPxPos];
+ }
+
+ const FfxUInt32 FrameIndexSinceLastReset()
+ {
+ return LoadCounter(COUNTER_FRAME_INDEX_SINCE_LAST_RESET);
+ }
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_DEPTH)
+Texture2D r_input_depth : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_DEPTH);
+FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos)
+{
+ return r_input_depth[iPxPos];
+}
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_MOTION_VECTORS)
+Texture2D r_input_motion_vectors : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_MOTION_VECTORS);
+FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos)
+{
+ FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy;
+
+ FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
+
+#if FFX_FRAMEINTERPOLATION_OPTION_JITTERED_MOTION_VECTORS
+ fUvMotionVector -= MotionVectorJitterCancellation();
+#endif
+
+ return fUvMotionVector;
+}
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD)
+ Texture2D r_input_distortion_field : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD);
+ FfxFloat32x2 SampleDistortionField(FFX_PARAMETER_IN FfxFloat32x2 fUv)
+ {
+ return r_input_distortion_field.SampleLevel(s_LinearClamp, fUv, 0);
+ }
+#endif
+
+///////////////////////////////////////////////
+// declare UAVs and UAV accessors
+///////////////////////////////////////////////
+#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_MASK) && defined(FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT)
+ RWTexture2D rw_output : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT);
+ RWTexture2D rw_inpainting_mask : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_MASK);
+
+ FfxFloat32x4 RWLoadFrameinterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return FfxFloat32x4(rw_output[iPxPos], rw_inpainting_mask[iPxPos]);
+ }
+
+ void StoreFrameinterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 val)
+ {
+ rw_output[iPxPos] = val.rgb;
+ rw_inpainting_mask[iPxPos] = val.a;
+ }
+
+#elif defined(FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT)
+ RWTexture2D rw_output : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT);
+
+ FfxFloat32x4 RWLoadFrameinterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return rw_output[iPxPos];
+ }
+
+ void StoreFrameinterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 val)
+ {
+ rw_output[iPxPos] = val;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_MOTION_VECTORS
+ RWTexture2D rw_dilated_motion_vectors : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_MOTION_VECTORS);
+
+ FfxFloat32x2 RWLoadDilatedMotionVectors(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return rw_dilated_motion_vectors[iPxPos];
+ }
+
+ void StoreDilatedMotionVectors(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 val)
+ {
+ rw_dilated_motion_vectors[iPxPos] = val;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_DEPTH
+ RWTexture2D rw_dilated_depth : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_DEPTH);
+
+ FfxFloat32 RWLoadDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return rw_dilated_depth[iPxPos];
+ }
+
+ void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 val)
+ {
+ rw_dilated_depth[iPxPos] = val;
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME
+ RWTexture2D rw_reconstructed_depth_previous_frame : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME);
+
+ FfxFloat32 RWLoadReconstructedDepthPreviousFrame(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return ffxAsFloat(rw_reconstructed_depth_previous_frame[iPxPos]);
+ }
+
+ void UpdateReconstructedDepthPreviousFrame(FfxInt32x2 iPxSample, FfxFloat32 fDepth)
+ {
+ FfxUInt32 uDepth = ffxAsUInt32(fDepth);
+
+#if FFX_FRAMEINTERPOLATION_OPTION_INVERTED_DEPTH
+ InterlockedMax(rw_reconstructed_depth_previous_frame[iPxSample], uDepth);
+#else
+ InterlockedMin(rw_reconstructed_depth_previous_frame[iPxSample], uDepth); // min for standard, max for inverted depth
+#endif
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME
+ RWTexture2D rw_reconstructed_depth_interpolated_frame : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME);
+
+ FfxFloat32 RWLoadReconstructedDepthInterpolatedFrame(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return ffxAsFloat(rw_reconstructed_depth_interpolated_frame[iPxPos]);
+ }
+
+ void StoreReconstructedDepthInterpolatedFrame(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 value)
+ {
+ FfxUInt32 uDepth = ffxAsUInt32(value);
+ rw_reconstructed_depth_interpolated_frame[iPxPos] = uDepth;
+ }
+
+ void UpdateReconstructedDepthInterpolatedFrame(FfxInt32x2 iPxSample, FfxFloat32 fDepth)
+ {
+ FfxUInt32 uDepth = ffxAsUInt32(fDepth);
+
+#if FFX_FRAMEINTERPOLATION_OPTION_INVERTED_DEPTH
+ InterlockedMax(rw_reconstructed_depth_interpolated_frame[iPxSample], uDepth);
+#else
+ InterlockedMin(rw_reconstructed_depth_interpolated_frame[iPxSample], uDepth); // min for standard, max for inverted depth
+#endif
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_DISOCCLUSION_MASK
+ RWTexture2D rw_disocclusion_mask : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_DISOCCLUSION_MASK);
+
+ FfxFloat32x2 RWLoadDisocclusionMask(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return rw_disocclusion_mask[iPxPos];
+ }
+
+ void StoreDisocclusionMask(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 val)
+ {
+ rw_disocclusion_mask[iPxPos] = val;
+ }
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_X) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_Y)
+
+ RWTexture2D rw_game_motion_vector_field_x : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_X);
+ RWTexture2D rw_game_motion_vector_field_y : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_Y);
+
+ FfxUInt32 RWLoadGameMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return rw_game_motion_vector_field_x[iPxPos];
+ }
+
+ void StoreGameMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val)
+ {
+ rw_game_motion_vector_field_x[iPxPos] = val;
+ }
+
+ FfxUInt32 RWLoadGameMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return rw_game_motion_vector_field_y[iPxPos];
+ }
+
+ void StoreGameMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val)
+ {
+ rw_game_motion_vector_field_y[iPxPos] = val;
+ }
+
+ void UpdateGameMotionVectorField(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32x2 packedVector)
+ {
+ InterlockedMax(rw_game_motion_vector_field_x[iPxPos], packedVector.x);
+ InterlockedMax(rw_game_motion_vector_field_y[iPxPos], packedVector.y);
+ }
+
+ FfxUInt32 UpdateGameMotionVectorFieldEx(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32x2 packedVector)
+ {
+ FfxUInt32 uPreviousValueX = 0;
+ FfxUInt32 uPreviousValueY = 0;
+ InterlockedMax(rw_game_motion_vector_field_x[iPxPos], packedVector.x, uPreviousValueX);
+ InterlockedMax(rw_game_motion_vector_field_y[iPxPos], packedVector.y, uPreviousValueY);
+
+ const FfxUInt32 uExistingVectorFieldEntry = ffxMax(uPreviousValueX, uPreviousValueY);
+
+ return uExistingVectorFieldEntry;
+ }
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y)
+
+ RWTexture2D rw_optical_flow_motion_vector_field_x : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X);
+ RWTexture2D rw_optical_flow_motion_vector_field_y : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y);
+
+ FfxUInt32 RWLoadOpticalflowMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return rw_optical_flow_motion_vector_field_x[iPxPos];
+ }
+ void StoreOpticalflowMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val)
+ {
+ rw_optical_flow_motion_vector_field_x[iPxPos] = val;
+ }
+ FfxUInt32 RWLoadOpticalflowMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+ {
+ return rw_optical_flow_motion_vector_field_y[iPxPos];
+ }
+ void StoreOpticalflowMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val)
+ {
+ rw_optical_flow_motion_vector_field_y[iPxPos] = val;
+ }
+ void UpdateOpticalflowMotionVectorField(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32x2 packedVector)
+ {
+ InterlockedMax(rw_optical_flow_motion_vector_field_x[iPxPos], packedVector.x);
+ InterlockedMax(rw_optical_flow_motion_vector_field_y[iPxPos], packedVector.y);
+ }
+#endif
+
+#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_COUNTERS
+ globallycoherent RWStructuredBuffer rw_counters : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_COUNTERS);
+
+ FfxUInt32 RWLoadCounter(FFX_PARAMETER_IN FfxInt32 iPxPos)
+ {
+ return rw_counters[iPxPos];
+ }
+
+ void StoreCounter(FFX_PARAMETER_IN FfxInt32 iPxPos, FFX_PARAMETER_IN FfxUInt32 counter)
+ {
+ rw_counters[iPxPos] = counter;
+ }
+ void AtomicIncreaseCounter(FFX_PARAMETER_IN FfxInt32 iPxPos, FFX_PARAMETER_OUT FfxUInt32 oldVal)
+ {
+ InterlockedAdd(rw_counters[iPxPos], 1, oldVal);
+ }
+#endif
+
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_0) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_1) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_2) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_3) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_4) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_5) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_6) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_7) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_8) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_9) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_10) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_11) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_12)
+
+ RWTexture2D rw_inpainting_pyramid0 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_0);
+ RWTexture2D rw_inpainting_pyramid1 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_1);
+ RWTexture2D rw_inpainting_pyramid2 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_2);
+ RWTexture2D rw_inpainting_pyramid3 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_3);
+ RWTexture2D rw_inpainting_pyramid4 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_4);
+ globallycoherent RWTexture2D rw_inpainting_pyramid5 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_5);
+ RWTexture2D rw_inpainting_pyramid6 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_6);
+ RWTexture2D rw_inpainting_pyramid7 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_7);
+ RWTexture2D rw_inpainting_pyramid8 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_8);
+ RWTexture2D rw_inpainting_pyramid9 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_9);
+ RWTexture2D rw_inpainting_pyramid10 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_10);
+ RWTexture2D rw_inpainting_pyramid11 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_11);
+ RWTexture2D rw_inpainting_pyramid12 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_12);
+
+
+ FfxFloat32x4 RWLoadInpaintingPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 index)
+ {
+ #define LOAD(idx) \
+ if (index == idx) \
+ { \
+ return rw_inpainting_pyramid##idx[iPxPos]; \
+ }
+ LOAD(0);
+ LOAD(1);
+ LOAD(2);
+ LOAD(3);
+ LOAD(4);
+ LOAD(5);
+ LOAD(6);
+ LOAD(7);
+ LOAD(8);
+ LOAD(9);
+ LOAD(10);
+ LOAD(11);
+ LOAD(12);
+ return 0;
+
+ #undef LOAD
+ }
+
+ void StoreInpaintingPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 outValue, FFX_PARAMETER_IN FfxUInt32 index)
+ {
+ #define STORE(idx) \
+ if (index == idx) \
+ { \
+ rw_inpainting_pyramid##idx[iPxPos] = outValue; \
+ }
+
+ STORE(0);
+ STORE(1);
+ STORE(2);
+ STORE(3);
+ STORE(4);
+ STORE(5);
+ STORE(6);
+ STORE(7);
+ STORE(8);
+ STORE(9);
+ STORE(10);
+ STORE(11);
+ STORE(12);
+
+ #undef STORE
+ }
+#endif
+
+#endif // #if defined(FFX_GPU)
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_common.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_common.h
new file mode 100644
index 000000000000..8206bf43bca1
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_common.h
@@ -0,0 +1,445 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#if !defined(FFX_FRAMEINTERPOLATION_COMMON_H)
+#define FFX_FRAMEINTERPOLATION_COMMON_H
+
+#define FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_TEAR_LINES (1 << 0)
+#define FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_RESET_INDICATORS (1 << 1)
+#define FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_VIEW (1 << 2)
+
+FFX_STATIC const FfxFloat32 FFX_FRAMEINTERPOLATION_EPSILON = 1e-03f;
+FFX_STATIC const FfxFloat32 FFX_FRAMEINTERPOLATION_FLT_MAX = 3.402823466e+38f;
+FFX_STATIC const FfxFloat32 FFX_FRAMEINTERPOLATION_FLT_MIN = 1.175494351e-38f;
+
+FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = FFX_FRAMEINTERPOLATION_EPSILON;
+
+FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb)
+{
+ return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f));
+}
+
+FfxFloat32 LinearRec2020ToLuminance(FfxFloat32x3 linearRec2020RGB)
+{
+ FfxFloat32 fY = 0.2627 * linearRec2020RGB.x + 0.678 * linearRec2020RGB.y + 0.0593 * linearRec2020RGB.z;
+ return fY;
+}
+
+FfxFloat32x3 ffxscRGBToLinear(FfxFloat32x3 value, FfxFloat32 minLuminance, FfxFloat32 maxLuminance)
+{
+ FfxFloat32x3 p = value - ffxBroadcast3(minLuminance / 80.0f);
+ return p / ffxBroadcast3((maxLuminance - minLuminance) / 80.0f);
+}
+
+FfxFloat32x3 RawRGBToLinear(FfxFloat32x3 fRawRgb)
+{
+ FfxFloat32x3 fLinearRgb;
+
+ switch (BackBufferTransferFunction())
+ {
+ case 0:
+ fLinearRgb = ffxLinearFromSrgb(fRawRgb);
+ break;
+ case 1:
+ fLinearRgb = ffxLinearFromPQ(fRawRgb) * (10000.0f / MaxLuminance());
+ break;
+ case 2:
+ fLinearRgb = ffxscRGBToLinear(fRawRgb, MinLuminance(), MaxLuminance());
+ break;
+ }
+
+ return fLinearRgb;
+}
+
+FfxFloat32 RawRGBToLuminance(FfxFloat32x3 fRawRgb)
+{
+ FfxFloat32 fLuminance = 0.0f;
+
+ switch (BackBufferTransferFunction())
+ {
+ case 0:
+ fLuminance = RGBToLuma(RawRGBToLinear(fRawRgb));
+ break;
+ case 1:
+ fLuminance = LinearRec2020ToLuminance(RawRGBToLinear(fRawRgb));
+ break;
+ case 2:
+ fLuminance = RGBToLuma(RawRGBToLinear(fRawRgb));
+ break;
+ }
+
+ return fLuminance;
+}
+
+FfxFloat32 RawRGBToPerceivedLuma(FfxFloat32x3 fRawRgb)
+{
+ FfxFloat32 fLuminance = RawRGBToLuminance(fRawRgb);
+
+ FfxFloat32 fPercievedLuminance = 0;
+ if (fLuminance <= 216.0f / 24389.0f)
+ {
+ fPercievedLuminance = fLuminance * (24389.0f / 27.0f);
+ }
+ else
+ {
+ fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f;
+ }
+
+ return fPercievedLuminance * 0.01f;
+}
+
+struct BilinearSamplingData
+{
+ FfxInt32x2 iOffsets[4];
+ FfxFloat32 fWeights[4];
+ FfxInt32x2 iBasePos;
+};
+
+BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize)
+{
+ BilinearSamplingData data;
+
+ FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f);
+ data.iBasePos = FfxInt32x2(floor(fPxSample));
+ FfxFloat32x2 fPxFrac = ffxFract(fPxSample);
+
+ data.iOffsets[0] = FfxInt32x2(0, 0);
+ data.iOffsets[1] = FfxInt32x2(1, 0);
+ data.iOffsets[2] = FfxInt32x2(0, 1);
+ data.iOffsets[3] = FfxInt32x2(1, 1);
+
+ data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y);
+ data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y);
+ data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y);
+ data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y);
+
+ return data;
+}
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION)
+FfxFloat32 ConvertFromDeviceDepthToViewSpace(FfxFloat32 fDeviceDepth)
+{
+ const FfxFloat32x4 deviceToViewDepth = DeviceToViewSpaceTransformFactors();
+ return deviceToViewDepth[1] / (fDeviceDepth - deviceToViewDepth[0]);
+}
+
+FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize)
+{
+ return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f);
+}
+
+FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
+{
+ const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
+
+ const FfxFloat32 Z = ConvertFromDeviceDepthToViewSpace(fDeviceDepth);
+
+ const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize);
+ const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z;
+ const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z;
+
+ return FfxFloat32x3(X, Y, Z);
+}
+#endif
+
+FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size)
+{
+ return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size)));
+}
+
+FfxBoolean IsUvInside(FfxFloat32x2 fUv)
+{
+ return (fUv.x > 0.0f && fUv.x < 1.0f) && (fUv.y > 0.0f && fUv.y < 1.0f);
+}
+
+FfxBoolean IsInRect(FfxInt32x2 pos, FfxInt32x2 iRectCorner, FfxInt32x2 iRectSize)
+{
+ return (pos.x >= iRectCorner.x && pos.x < (iRectSize.x + iRectCorner.x) && pos.y >= iRectCorner.y && pos.y < (iRectSize.y + iRectCorner.y));
+}
+
+FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1)
+{
+ const FfxFloat32 m = ffxMax(v0, v1);
+ return m != 0 ? ffxMin(v0, v1) / m : 0;
+}
+
+FfxFloat32 NormalizedDot3(const FfxFloat32x3 v0, const FfxFloat32x3 v1)
+{
+ FfxFloat32 fMaxLength = ffxMax(length(v0), length(v1));
+
+ return fMaxLength > 0.0f ? dot(v0 / fMaxLength, v1 / fMaxLength) : 1.0f;
+}
+
+FfxFloat32 NormalizedDot2(const FfxFloat32x2 v0, const FfxFloat32x2 v1)
+{
+ FfxFloat32 fMaxLength = ffxMax(length(v0), length(v1));
+
+ return fMaxLength > 0.0f ? dot(v0 / fMaxLength, v1 / fMaxLength) : 1.0f;
+}
+
+FfxFloat32 CalculateStaticContentFactor(FfxFloat32x3 fCurrentInterpolationSource, FfxFloat32x3 fPresentColor)
+{
+ const FfxFloat32x3 fFactor = ffxSaturate(FfxFloat32x3(
+ ffxSaturate((1.0f - MinDividedByMax(fCurrentInterpolationSource.r, fPresentColor.r)) / 0.1f),
+ ffxSaturate((1.0f - MinDividedByMax(fCurrentInterpolationSource.g, fPresentColor.g)) / 0.1f),
+ ffxSaturate((1.0f - MinDividedByMax(fCurrentInterpolationSource.b, fPresentColor.b)) / 0.1f)
+ ));
+
+ return max(fFactor.x, max(fFactor.y, fFactor.z));
+}
+
+//
+// MOTION VECTOR FIELD
+//
+
+FFX_STATIC const FfxUInt32 MOTION_VECTOR_FIELD_ENTRY_BIT_COUNT = 32;
+
+// Make sure all bit counts add up to MOTION_VECTOR_FIELD_ENTRY_BIT_COUNT
+FFX_STATIC const FfxUInt32 MOTION_VECTOR_FIELD_VECTOR_COEFFICIENT_BIT_COUNT = 16;
+FFX_STATIC const FfxUInt32 MOTION_VECTOR_FIELD_PRIORITY_LOW_BIT_COUNT = 5;
+FFX_STATIC const FfxUInt32 MOTION_VECTOR_FIELD_PRIORITY_HIGH_BIT_COUNT = 10;
+FFX_STATIC const FfxUInt32 MOTION_VECTOR_PRIMARY_VECTOR_INDICATION_BIT_COUNT = 1;
+
+FFX_STATIC const FfxUInt32 MOTION_VECTOR_FIELD_PRIMARY_VECTOR_INDICATION_BIT = (1U << (MOTION_VECTOR_FIELD_ENTRY_BIT_COUNT - 1));
+
+FFX_STATIC const FfxUInt32 PRIORITY_LOW_MAX = (1U << MOTION_VECTOR_FIELD_PRIORITY_LOW_BIT_COUNT) - 1;
+FFX_STATIC const FfxUInt32 PRIORITY_HIGH_MAX = (1U << MOTION_VECTOR_FIELD_PRIORITY_HIGH_BIT_COUNT) - 1;
+
+FFX_STATIC const FfxUInt32 PRIORITY_LOW_OFFSET = MOTION_VECTOR_FIELD_VECTOR_COEFFICIENT_BIT_COUNT;
+FFX_STATIC const FfxUInt32 PRIORITY_HIGH_OFFSET = PRIORITY_LOW_OFFSET + MOTION_VECTOR_FIELD_PRIORITY_LOW_BIT_COUNT;
+FFX_STATIC const FfxUInt32 PRIMARY_VECTOR_INDICATION_OFFSET = PRIORITY_HIGH_OFFSET + MOTION_VECTOR_FIELD_PRIORITY_HIGH_BIT_COUNT;
+
+struct VectorFieldEntry
+{
+ FfxFloat32x2 fMotionVector;
+ FfxFloat32 uHighPriorityFactor;
+ FfxFloat32 uLowPriorityFactor;
+ FfxBoolean bValid;
+ FfxBoolean bPrimary;
+ FfxBoolean bSecondary;
+ FfxBoolean bInPainted;
+ FfxFloat32 fVelocity;
+ FfxBoolean bNegOutside;
+ FfxBoolean bPosOutside;
+};
+
+VectorFieldEntry NewVectorFieldEntry()
+{
+ VectorFieldEntry vfe;
+ vfe.fMotionVector = FfxFloat32x2(0.0, 0.0);
+ vfe.uHighPriorityFactor = 0.0;
+ vfe.uLowPriorityFactor = 0.0;
+ vfe.bValid = false;
+ vfe.bPrimary = false;
+ vfe.bSecondary = false;
+ vfe.bInPainted = false;
+ vfe.fVelocity = 0.0;
+ vfe.bNegOutside = false;
+ vfe.bPosOutside = false;
+ return vfe;
+}
+
+FfxBoolean PackedVectorFieldEntryIsPrimary(FfxUInt32 packedEntry)
+{
+ return ((packedEntry & MOTION_VECTOR_FIELD_PRIMARY_VECTOR_INDICATION_BIT) != 0);
+}
+
+FfxUInt32x2 PackVectorFieldEntries(FfxBoolean bIsPrimary, FfxUInt32 uHighPriorityFactor, FfxUInt32 uLowPriorityFactor, FfxFloat32x2 fMotionVector)
+{
+ const FfxUInt32 uPriority =
+ (FfxUInt32(bIsPrimary) * MOTION_VECTOR_FIELD_PRIMARY_VECTOR_INDICATION_BIT)
+ | ((uHighPriorityFactor & PRIORITY_HIGH_MAX) << PRIORITY_HIGH_OFFSET)
+ | ((uLowPriorityFactor & PRIORITY_LOW_MAX) << PRIORITY_LOW_OFFSET);
+
+ FfxUInt32 packedX = uPriority | ffxF32ToF16(fMotionVector.x);
+ FfxUInt32 packedY = uPriority | ffxF32ToF16(fMotionVector.y);
+
+ return FfxUInt32x2(packedX, packedY);
+}
+
+void UnpackVectorFieldEntries(FfxUInt32x2 packed, out VectorFieldEntry vfElement)
+{
+ vfElement.uHighPriorityFactor = FfxFloat32((packed.x >> PRIORITY_HIGH_OFFSET) & PRIORITY_HIGH_MAX) / PRIORITY_HIGH_MAX;
+ vfElement.uLowPriorityFactor = FfxFloat32((packed.x >> PRIORITY_LOW_OFFSET) & PRIORITY_LOW_MAX) / PRIORITY_LOW_MAX;
+
+ vfElement.bPrimary = PackedVectorFieldEntryIsPrimary(packed.x);
+ vfElement.bValid = (vfElement.uHighPriorityFactor > 0.0f);
+ vfElement.bSecondary = vfElement.bValid && !vfElement.bPrimary;
+
+ // Reverse priority factor for secondary vectors
+ if (vfElement.bSecondary)
+ {
+ vfElement.uHighPriorityFactor = 1.0f - vfElement.uHighPriorityFactor;
+ }
+
+ vfElement.fMotionVector.x = ffxUnpackF32(packed.x).x;
+ vfElement.fMotionVector.y = ffxUnpackF32(packed.y).x;
+ vfElement.bInPainted = false;
+}
+
+//
+// MOTION VECTOR FIELD
+//
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID)
+FfxFloat32x4 ComputeMvInpaintingLevel(FfxFloat32x2 fUv, const FfxInt32 iMipLevel, const FfxInt32x2 iTexSize)
+{
+ BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUv, iTexSize);
+
+ FfxFloat32 fSum = 0.0f;
+ FfxFloat32x4 fColor = FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+ fColor.z = 0;
+
+ const FfxFloat32 fMaxPriorityFactor = 1.0f;
+
+ for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++)
+ {
+ const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+ const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
+
+ if (IsOnScreen(iSamplePos, iTexSize))
+ {
+ FfxFloat32x4 fSample = LoadInpaintingPyramid(iMipLevel, iSamplePos);
+
+ const FfxFloat32 fPriorityFactor = fSample.z;
+ const FfxFloat32 fValidMvFactor = FfxFloat32(fSample.z > 0);
+ const FfxFloat32 fSampleWeight = bilinearInfo.fWeights[iSampleIndex] * fValidMvFactor * fPriorityFactor;
+
+ fSum += fSampleWeight;
+ fColor += fSample * fSampleWeight;
+ }
+ }
+
+ fColor /= (fSum > 0.0f) ? fSum : 1.0f;
+
+ return fColor;
+}
+#if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y)
+
+void LoadInpaintedGameFieldMv(FfxFloat32x2 fUv, out VectorFieldEntry vfElement)
+{
+ FfxInt32x2 iPxSample = FfxInt32x2(fUv * RenderSize());
+ FfxUInt32x2 packedGameFieldMv = LoadGameFieldMv(iPxSample);
+ UnpackVectorFieldEntries(packedGameFieldMv, vfElement);
+
+ if (!vfElement.bValid)
+ {
+ //FfxFloat32x2 fUv = (FfxFloat32x2(iPxSample) + 0.5f) / RenderSize();
+ FfxInt32x2 iTexSize = RenderSize();
+
+ FfxFloat32x4 fInPaintedVector = FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+ for (FfxInt32 iMipLevel = 0; iMipLevel < 11 && (fInPaintedVector.w == 0.0f); iMipLevel++)
+ {
+ iTexSize /= 2;
+
+ fInPaintedVector = ComputeMvInpaintingLevel(fUv, iMipLevel, iTexSize);
+ }
+
+ vfElement.fMotionVector = fInPaintedVector.xy;
+ vfElement.uHighPriorityFactor = fInPaintedVector.z;
+ vfElement.uLowPriorityFactor = fInPaintedVector.w;
+ vfElement.bInPainted = true;
+ }
+
+ vfElement.bNegOutside = !IsUvInside(fUv - vfElement.fMotionVector);
+ vfElement.bPosOutside = !IsUvInside(fUv + vfElement.fMotionVector);
+ vfElement.fVelocity = length(vfElement.fMotionVector);
+}
+#endif
+#endif
+
+#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y) && \
+ defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION)
+void SampleOpticalFlowMotionVectorField(FfxFloat32x2 fUv, out VectorFieldEntry vfElement)
+{
+ const FfxFloat32 scaleFactor = 1.0f;
+
+ BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUv, FfxInt32x2(GetOpticalFlowSize2() * scaleFactor));
+
+ vfElement = NewVectorFieldEntry();
+
+ FfxFloat32 fWeightSum = 0.0f;
+ for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++)
+ {
+ const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+ const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
+
+ if (IsOnScreen(iSamplePos, FfxInt32x2(GetOpticalFlowSize2() * scaleFactor)))
+ {
+ const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
+
+ VectorFieldEntry fOfVectorSample = NewVectorFieldEntry();
+ FfxInt32x2 packedOpticalFlowMv = FfxInt32x2(LoadOpticalFlowFieldMv(iSamplePos));
+ UnpackVectorFieldEntries(packedOpticalFlowMv, fOfVectorSample);
+
+ vfElement.fMotionVector += fOfVectorSample.fMotionVector * fWeight;
+ vfElement.uHighPriorityFactor += fOfVectorSample.uHighPriorityFactor * fWeight;
+ vfElement.uLowPriorityFactor += fOfVectorSample.uLowPriorityFactor * fWeight;
+
+ fWeightSum += fWeight;
+ }
+ }
+
+ if (fWeightSum > 0.0f)
+ {
+ vfElement.fMotionVector /= fWeightSum;
+ vfElement.uHighPriorityFactor /= fWeightSum;
+ vfElement.uLowPriorityFactor /= fWeightSum;
+ }
+
+ vfElement.bNegOutside = !IsUvInside(fUv - vfElement.fMotionVector);
+ vfElement.bPosOutside = !IsUvInside(fUv + vfElement.fMotionVector);
+ vfElement.fVelocity = length(vfElement.fMotionVector);
+}
+#endif
+
+FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb)
+{
+ return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx;
+}
+
+FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb)
+{
+ return fRgb / ffxMax(FFX_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
+}
+
+FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos)
+{
+ FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter();
+ FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize();
+ FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr));
+ return iPxHrPos;
+}
+#if FFX_HALF
+FFX_MIN16_I2 ComputeHrPosFromLrPos(FFX_MIN16_I2 iPxLrPos)
+{
+ FFX_MIN16_F2 fSrcJitteredPos = FFX_MIN16_F2(iPxLrPos) + FFX_MIN16_F(0.5f) - FFX_MIN16_F2(Jitter());
+ FFX_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFX_MIN16_F2(RenderSize())) * FFX_MIN16_F2(DisplaySize());
+ FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(floor(fLrPosInHr));
+ return iPxHrPos;
+}
+#endif
+
+#endif //!defined(FFX_FRAMEINTERPOLATION_COMMON_H)
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_compute_game_vector_field_inpainting_pyramid.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_compute_game_vector_field_inpainting_pyramid.h
new file mode 100644
index 000000000000..3c6132b65d61
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_compute_game_vector_field_inpainting_pyramid.h
@@ -0,0 +1,121 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FRAMEINTERPOLATION_COMPUTE_GAME_VECTOR_FIELD_INPAINTING_PYRAMID_H
+#define FFX_FRAMEINTERPOLATION_COMPUTE_GAME_VECTOR_FIELD_INPAINTING_PYRAMID_H
+
+#include "ffx_frameinterpolation_common.h"
+//--------------------------------------------------------------------------------------
+// Buffer definitions - global atomic counter
+//--------------------------------------------------------------------------------------
+
+FFX_GROUPSHARED FfxUInt32 spdCounter;
+FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16];
+FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16];
+FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16];
+FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16];
+
+FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 tex, FfxUInt32 slice)
+{
+ VectorFieldEntry gameMv;
+ FfxUInt32x2 packedGameFieldMv = LoadGameFieldMv(tex);
+ UnpackVectorFieldEntries(packedGameFieldMv, gameMv);
+
+ return FfxFloat32x4(gameMv.fMotionVector, gameMv.uHighPriorityFactor, gameMv.uLowPriorityFactor) * FfxFloat32(DisplaySize().x > 0);
+}
+
+FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice)
+{
+ return RWLoadInpaintingPyramid(tex, 5);
+}
+
+void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice)
+{
+ StoreInpaintingPyramid(pix, outValue, index);
+}
+
+void SpdIncreaseAtomicCounter(FfxUInt32 slice)
+{
+ AtomicIncreaseCounter(COUNTER_SPD, spdCounter);
+}
+
+FfxUInt32 SpdGetAtomicCounter()
+{
+ return spdCounter;
+}
+void SpdResetAtomicCounter(FfxUInt32 slice)
+{
+ StoreCounter(COUNTER_SPD, 0);
+}
+
+FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
+{
+ return FfxFloat32x4(
+ spdIntermediateR[x][y],
+ spdIntermediateG[x][y],
+ spdIntermediateB[x][y],
+ spdIntermediateA[x][y]);
+}
+
+void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
+{
+ spdIntermediateR[x][y] = value.x;
+ spdIntermediateG[x][y] = value.y;
+ spdIntermediateB[x][y] = value.z;
+ spdIntermediateA[x][y] = value.w;
+}
+
+FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
+{
+ FfxFloat32x4 vec = FfxFloat32x4(0,0,0,0);
+
+ FfxFloat32 fWeightSum = 0.0f;
+#define ADD(SAMPLE) { \
+ FfxFloat32 fWeight = FfxFloat32(SAMPLE.z > 0.0f); \
+ vec += SAMPLE * fWeight; \
+ fWeightSum += fWeight; \
+ }
+
+ ADD(v0);
+ ADD(v1);
+ ADD(v2);
+ ADD(v3);
+
+ vec /= (fWeightSum > FFX_FRAMEINTERPOLATION_EPSILON) ? fWeightSum : 1.0f;
+
+ return vec;
+}
+
+#include "../spd/ffx_spd.h"
+
+void computeFrameinterpolationGameVectorFieldInpaintingPyramid(FfxInt32x3 iGroupId, FfxInt32 iLocalIndex)
+{
+ SpdDownsample(
+ FfxUInt32x2(iGroupId.xy),
+ FfxUInt32(iLocalIndex),
+ FfxUInt32(NumMips()),
+ FfxUInt32(NumWorkGroups()),
+ FfxUInt32(iGroupId.z),
+ FfxUInt32x2(WorkGroupOffset()));
+}
+
+#endif // FFX_FRAMEINTERPOLATION_COMPUTE_GAME_VECTOR_FIELD_INPAINTING_PYRAMID_H
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_compute_inpainting_pyramid.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_compute_inpainting_pyramid.h
new file mode 100644
index 000000000000..c9b3d7a73915
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_compute_inpainting_pyramid.h
@@ -0,0 +1,120 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FRAMEINTERPOLATION_COMPUTE_INPAINTING_PYRAMID_H
+#define FFX_FRAMEINTERPOLATION_COMPUTE_INPAINTING_PYRAMID_H
+
+//--------------------------------------------------------------------------------------
+// Buffer definitions - global atomic counter
+//--------------------------------------------------------------------------------------
+
+FFX_GROUPSHARED FfxUInt32 spdCounter;
+FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16];
+FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16];
+FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16];
+FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16];
+
+FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 tex, FfxUInt32 slice)
+{
+ FfxFloat32x4 fColor = LoadFrameInterpolationOutput(tex) * FfxFloat32(DisplaySize().x > 0);
+
+ // reverse sample weights
+ fColor.w = ffxSaturate(1.0f - fColor.w);
+
+
+ if (tex.x < InterpolationRectBase().x || tex.x >= (InterpolationRectSize().x + InterpolationRectBase().x) || tex.y < InterpolationRectBase().y ||
+ tex.y >= (InterpolationRectSize().y + InterpolationRectBase().y))
+ {
+ fColor.w = 0.0f; // don't take contributions from outside of the interpolation rect
+ }
+
+ return fColor;
+}
+
+FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice)
+{
+ return RWLoadInpaintingPyramid(tex, 5);
+}
+
+void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice)
+{
+ StoreInpaintingPyramid(pix, outValue, index);
+}
+
+void SpdIncreaseAtomicCounter(FfxUInt32 slice)
+{
+ AtomicIncreaseCounter(COUNTER_SPD, spdCounter);
+}
+
+FfxUInt32 SpdGetAtomicCounter()
+{
+ return spdCounter;
+}
+void SpdResetAtomicCounter(FfxUInt32 slice)
+{
+ StoreCounter(COUNTER_SPD, 0);
+}
+
+FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
+{
+ return FfxFloat32x4(
+ spdIntermediateR[x][y],
+ spdIntermediateG[x][y],
+ spdIntermediateB[x][y],
+ spdIntermediateA[x][y]);
+}
+
+void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
+{
+ spdIntermediateR[x][y] = value.x;
+ spdIntermediateG[x][y] = value.y;
+ spdIntermediateB[x][y] = value.z;
+ spdIntermediateA[x][y] = value.w;
+}
+
+FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
+{
+ FfxFloat32x4 w = FfxFloat32x4(v0.w, v1.w, v2.w, v3.w);
+
+ FfxFloat32 sum = (w[0] + w[1] + w[2] + w[3]);
+
+ if (sum == 0.0f) {
+ return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+ }
+
+ return (v0 * w[0] + v1 * w[1] + v2 * w[2] + v3 * w[3]) / sum;
+}
+
+#include "../spd/ffx_spd.h"
+
+void computeFrameinterpolationInpaintingPyramid(FfxInt32x3 iGroupId, FfxInt32 iLocalIndex)
+{
+ SpdDownsample(
+ FfxUInt32x2(iGroupId.xy),
+ FfxUInt32(iLocalIndex),
+ FfxUInt32(NumMips()),
+ FfxUInt32(NumWorkGroups()),
+ FfxUInt32(iGroupId.z),
+ FfxUInt32x2(WorkGroupOffset()));
+}
+
+#endif // FFX_FRAMEINTERPOLATION_COMPUTE_INPAINTING_PYRAMID_H
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_debug_view.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_debug_view.h
new file mode 100644
index 000000000000..bcd3a52a7941
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_debug_view.h
@@ -0,0 +1,172 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FRAMEINTERPOLATION_DEBUG_VIEW_H
+#define FFX_FRAMEINTERPOLATION_DEBUG_VIEW_H
+
+struct FfxFrameInterpolationDebugViewport
+{
+ FfxInt32x2 offset;
+ FfxInt32x2 size;
+};
+
+// Macro to cull and draw debug viewport
+#define DRAW_VIEWPORT(function, pos, vp) \
+ { \
+ if (pointIsInsideViewport(pos, vp)) \
+ { \
+ function(pos, vp); \
+ } \
+ }
+
+FfxFloat32x2 getTransformedUv(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp)
+{
+ FfxFloat32x2 fUv = (FfxFloat32x2(iPxPos - vp.offset) + 0.5f) / vp.size;
+
+ return fUv;
+}
+
+FfxFloat32x4 getMotionVectorColor(FfxFloat32x2 fMotionVector)
+{
+ return FfxFloat32x4(0.5f + fMotionVector * DisplaySize() * 0.1f, 0.5f, 1.0f);
+}
+
+FfxFloat32x4 getUnusedIndicationColor(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp)
+{
+ FfxInt32x2 basePos = iPxPos - vp.offset;
+
+ FfxFloat32 ar = FfxFloat32(vp.size.x) / FfxFloat32(vp.size.y);
+
+ return FfxFloat32x4(basePos.x == FfxInt32(basePos.y * ar), 0, 0, 1);
+}
+
+void drawGameMotionVectorFieldVectors(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp)
+{
+ FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp);
+
+ VectorFieldEntry gameMv;
+ LoadInpaintedGameFieldMv(fUv, gameMv);
+
+ StoreFrameinterpolationOutput(iPxPos, getMotionVectorColor(gameMv.fMotionVector));
+}
+
+void drawGameMotionVectorFieldDepthPriority(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp)
+{
+ FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp);
+
+ VectorFieldEntry gameMv;
+ LoadInpaintedGameFieldMv(fUv, gameMv);
+
+ StoreFrameinterpolationOutput(iPxPos, FfxFloat32x4(0, gameMv.uHighPriorityFactor, 0, 1));
+}
+
+void drawOpticalFlowMotionVectorField(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp)
+{
+ FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp);
+
+ VectorFieldEntry ofMv;
+ SampleOpticalFlowMotionVectorField(fUv, ofMv);
+
+ StoreFrameinterpolationOutput(iPxPos, getMotionVectorColor(ofMv.fMotionVector));
+}
+
+void drawDisocclusionMask(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp)
+{
+ FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp);
+
+ FfxFloat32x2 fLrUv = fUv * (FfxFloat32x2(RenderSize()) / GetMaxRenderSize());
+
+ FfxFloat32x2 fDisocclusionFactor = ffxSaturate(SampleDisocclusionMask(fLrUv).xy);
+
+ StoreFrameinterpolationOutput(iPxPos, FfxFloat32x4(fDisocclusionFactor, 0, 1));
+}
+
+FfxFloat32x4 getDistortionField(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp)
+{
+ FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp);
+
+ FfxFloat32x2 fDistortionFieldUv = abs(SampleDistortionField(fUv).xy);
+
+ return FfxFloat32x4(fDistortionFieldUv * 10.0f, 0.0f, 1.0f);
+}
+
+void drawPresentBackbuffer(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp)
+{
+ FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp);
+
+ FfxFloat32x4 fPresentColor = getDistortionField(iPxPos, vp);
+
+ if (GetHUDLessAttachedFactor() == 1)
+ {
+ fPresentColor = SamplePresentBackbuffer(fUv);
+ }
+
+ StoreFrameinterpolationOutput(iPxPos, fPresentColor);
+}
+
+void drawCurrentInterpolationSource(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp)
+{
+ FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp);
+
+ FfxFloat32x4 fCurrentBackBuffer = FfxFloat32x4(SampleCurrentBackbuffer(fUv), 1.0f);
+
+ StoreFrameinterpolationOutput(iPxPos, fCurrentBackBuffer);
+}
+
+FfxBoolean pointIsInsideViewport(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp)
+{
+ FfxInt32x2 extent = vp.offset + vp.size;
+
+ return (iPxPos.x >= vp.offset.x && iPxPos.x < extent.x) && (iPxPos.y >= vp.offset.y && iPxPos.y < extent.y);
+}
+
+void computeDebugView(FfxInt32x2 iPxPos)
+{
+#define VIEWPORT_GRID_SIZE_X 3
+#define VIEWPORT_GRID_SIZE_Y 3
+
+ FfxFloat32x2 fViewportScale = FfxFloat32x2(1.0f / VIEWPORT_GRID_SIZE_X, 1.0f / VIEWPORT_GRID_SIZE_Y);
+ FfxInt32x2 iViewportSize = FfxInt32x2(DisplaySize() * fViewportScale);
+
+ // compute grid [y][x] for easier placement of viewports
+ FfxFrameInterpolationDebugViewport vp[VIEWPORT_GRID_SIZE_Y][VIEWPORT_GRID_SIZE_X];
+ for (FfxInt32 y = 0; y < VIEWPORT_GRID_SIZE_Y; y++)
+ {
+ for (FfxInt32 x = 0; x < VIEWPORT_GRID_SIZE_X; x++)
+ {
+ vp[y][x].offset = iViewportSize * FfxInt32x2(x, y);
+ vp[y][x].size = iViewportSize;
+ }
+ }
+
+ // top row
+ DRAW_VIEWPORT(drawGameMotionVectorFieldVectors, iPxPos, vp[0][0]);
+ DRAW_VIEWPORT(drawGameMotionVectorFieldDepthPriority, iPxPos, vp[0][1]);
+ DRAW_VIEWPORT(drawOpticalFlowMotionVectorField, iPxPos, vp[0][2]);
+
+ // bottom row
+ DRAW_VIEWPORT(drawDisocclusionMask, iPxPos, vp[2][0]);
+ DRAW_VIEWPORT(drawCurrentInterpolationSource, iPxPos, vp[2][1]);
+ DRAW_VIEWPORT(drawPresentBackbuffer, iPxPos, vp[2][2]);
+}
+
+#endif // FFX_FRAMEINTERPOLATION_DEBUG_VIEW_H
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_disocclusion_mask.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_disocclusion_mask.h
new file mode 100644
index 000000000000..03adc96f3a88
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_disocclusion_mask.h
@@ -0,0 +1,146 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FRAMEINTERPOLATION_DISOCCLUSION_MASK_H
+#define FFX_FRAMEINTERPOLATION_DISOCCLUSION_MASK_H
+
+FFX_STATIC const FfxFloat32 DepthClipBaseScale = 1.0f;
+
+FfxFloat32 ComputeSampleDepthClip(FfxInt32x2 iPxSamplePos, FfxFloat32 fPreviousDepth, FfxFloat32 fPreviousDepthBilinearWeight, FfxFloat32 fCurrentDepthViewSpace)
+{
+ FfxFloat32 fPrevNearestDepthViewSpace = ConvertFromDeviceDepthToViewSpace(fPreviousDepth);
+
+ // Depth separation logic ref: See "Minimum Triangle Separation for Correct Z-Buffer Occlusion"
+ // Intention: worst case of formula in Figure4 combined with Ksep factor in Section 4
+ const FfxFloat32 fHalfViewportWidth = RenderSize().x * 0.5f;
+ FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
+
+ // WARNING: Ksep only works with reversed-z with infinite projection.
+ const FfxFloat32 Ksep = 1.37e-05f;
+ FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth;
+ FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
+
+ FfxFloat32 fDepthClipFactor = (fDepthDiff > 0) ? ffxSaturate(fRequiredDepthSeparation / fDepthDiff) : 1.0f;
+
+ return fPreviousDepthBilinearWeight * fDepthClipFactor * ffxLerp(1.0f, DepthClipBaseScale, ffxSaturate(fDepthDiff * fDepthDiff));
+}
+
+FfxFloat32 LoadEstimatedDepth(FfxUInt32 estimatedIndex, FfxInt32x2 iSamplePos)
+{
+ const FfxFloat32x2 fUv = FfxFloat32x2(iSamplePos + 0.5f) / RenderSize();
+ const FfxFloat32x2 fDistortionFieldUv = SampleDistortionField(fUv);
+ FfxInt32x2 iDistortionPixelOffset = FfxInt32x2(fDistortionFieldUv.xy * RenderSize());
+
+ if (estimatedIndex == 0)
+ {
+ return LoadReconstructedDepthPreviousFrame(iSamplePos + iDistortionPixelOffset);
+ }
+ else if (estimatedIndex == 1)
+ {
+ return LoadDilatedDepth(iSamplePos + iDistortionPixelOffset);
+ }
+
+ return 0;
+}
+
+FfxFloat32 ComputeDepthClip(FfxUInt32 estimatedIndex, FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample)
+{
+ FfxFloat32 fCurrentDepthViewSpace = ConvertFromDeviceDepthToViewSpace(fCurrentDepthSample);
+ BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize());
+
+ FfxFloat32 fDilatedSum = 0.0f;
+ FfxFloat32 fDepth = 0.0f;
+ FfxFloat32 fWeightSum = 0.0f;
+ for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
+
+ const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+ const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
+
+ if (IsOnScreen(iSamplePos, RenderSize())) {
+ const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
+ if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
+
+ const FfxFloat32 fPrevDepthSample = LoadEstimatedDepth(estimatedIndex, iSamplePos);
+ const FfxFloat32 fPrevNearestDepthViewSpace = ConvertFromDeviceDepthToViewSpace(fPrevDepthSample);
+
+ const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
+
+ if (fDepthDiff > 0.0f) {
+
+#if FFX_FRAMEINTERPOLATION_OPTION_INVERTED_DEPTH
+ const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample);
+#else
+ const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample);
+#endif
+
+ const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth);
+ const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth);
+
+ const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()));
+ const FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
+
+ const FfxFloat32 Ksep = 1.37e-05f;
+ const FfxFloat32 Kfov = length(fCorner) / length(fCenter);
+ const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold;
+
+ const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f)));
+ const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor);
+
+ fDepth += FfxFloat32((fRequiredDepthSeparation / fDepthDiff) >= 1.0f) * fWeight;
+ fWeightSum += fWeight;
+ }
+ }
+ }
+ }
+
+ return (fWeightSum > 0.0f) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f;
+}
+
+void computeDisocclusionMask(FfxInt32x2 iPxPos)
+{
+ FfxFloat32 fDilatedDepth = LoadEstimatedInterpolationFrameDepth(iPxPos);
+
+ FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize();
+ FfxFloat32 fCurrentDepthViewSpace = ConvertFromDeviceDepthToViewSpace(fDilatedDepth);
+
+ VectorFieldEntry gameMv;
+ LoadInpaintedGameFieldMv(fDepthUv, gameMv);
+
+ const FfxFloat32 fDepthClipInterpolatedToPrevious = 1.0f - ComputeDepthClip(0, fDepthUv + gameMv.fMotionVector, fDilatedDepth);
+ const FfxFloat32 fDepthClipInterpolatedToCurrent = 1.0f - ComputeDepthClip(1, fDepthUv - gameMv.fMotionVector, fDilatedDepth);
+ FfxFloat32x2 fDisocclusionMask = FfxFloat32x2(fDepthClipInterpolatedToPrevious, fDepthClipInterpolatedToCurrent);
+
+ fDisocclusionMask = FfxFloat32x2(FFX_GREATER_THAN_EQUAL(fDisocclusionMask, ffxBroadcast2(FFX_FRAMEINTERPOLATION_EPSILON)));
+
+ // Avoid false disocclusion if primary game vector pointer outside screen area
+ const FfxFloat32x2 fSrcMotionVector = gameMv.fMotionVector * 2.0f;
+ const FfxInt32x2 iSamplePosPrevious = FfxInt32x2((fDepthUv + fSrcMotionVector) * RenderSize());
+ fDisocclusionMask.x = ffxSaturate(fDisocclusionMask.x + FfxFloat32(!IsOnScreen(iSamplePosPrevious, RenderSize())));
+
+ const FfxInt32x2 iSamplePosCurrent = FfxInt32x2((fDepthUv - fSrcMotionVector) * RenderSize());
+ fDisocclusionMask.y = ffxSaturate(fDisocclusionMask.y + FfxFloat32(!IsOnScreen(iSamplePosCurrent, RenderSize())));
+
+ StoreDisocclusionMask(iPxPos, fDisocclusionMask);
+
+}
+
+#endif // FFX_FRAMEINTERPOLATION_DISOCCLUSION_MASK_H
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field.h
new file mode 100644
index 000000000000..574e3dff2670
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field.h
@@ -0,0 +1,123 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FRAMEINTERPOLATION_GAME_MOTION_VECTOR_FIELD_H
+#define FFX_FRAMEINTERPOLATION_GAME_MOTION_VECTOR_FIELD_H
+
+FfxUInt32 getPriorityFactorFromViewSpaceDepth(FfxFloat32 fViewSpaceDepthInMeters)
+{
+ fViewSpaceDepthInMeters = ffxPow(fViewSpaceDepthInMeters, 0.33f);
+
+ FfxUInt32 uPriorityFactor = FfxUInt32(FfxFloat32(1 - (fViewSpaceDepthInMeters * (1.0f / (1.0f + fViewSpaceDepthInMeters)))) * PRIORITY_HIGH_MAX);
+
+ return ffxMax(1, uPriorityFactor);
+}
+
+void computeGameFieldMvs(FfxInt32x2 iPxPos)
+{
+ const FfxFloat32x2 fUvInScreenSpace = (FfxFloat32x2(iPxPos) + 0.5f) / RenderSize();
+
+ const FfxFloat32x2 fDistortionFieldUv = SampleDistortionField(fUvInScreenSpace);
+ FfxInt32x2 iDistortionPixelOffset = FfxInt32x2(fDistortionFieldUv.xy * RenderSize());
+
+ const FfxFloat32x2 fUvInInterpolationRectStart = FfxFloat32x2(InterpolationRectBase()) / DisplaySize();
+ const FfxFloat32x2 fUvLetterBoxScale = FfxFloat32x2(InterpolationRectSize()) / DisplaySize();
+ const FfxFloat32x2 fUvInInterpolationRect = fUvInInterpolationRectStart + fUvInScreenSpace * fUvLetterBoxScale;
+
+ const FfxFloat32 fDepthSample = LoadDilatedDepth(iPxPos + iDistortionPixelOffset);
+ const FfxFloat32x2 fGameMotionVector = LoadDilatedMotionVector(iPxPos + iDistortionPixelOffset);
+ const FfxFloat32x2 fMotionVectorHalf = fGameMotionVector * 0.5f;
+ const FfxFloat32x2 fInterpolatedLocationUv = fUvInScreenSpace + fMotionVectorHalf;
+
+ const FfxFloat32 fViewSpaceDepth = ConvertFromDeviceDepthToViewSpace(fDepthSample);
+ const FfxUInt32 uHighPriorityFactorPrimary = getPriorityFactorFromViewSpaceDepth(fViewSpaceDepth);
+
+ // pixel position in current frame + Game Motion Vector -> pixel position in previous frame
+ FfxFloat32x3 prevBackbufferCol = SamplePreviousBackbuffer(fUvInInterpolationRect+ fGameMotionVector * fUvLetterBoxScale).xyz; //returns color of current frame's pixel in previous frame buffer
+ FfxFloat32x3 curBackbufferCol = SampleCurrentBackbuffer(fUvInInterpolationRect).xyz; // returns color of current frame's pixel in current frame buffer
+ FfxFloat32 prevLuma = 0.001f + RawRGBToLuminance(prevBackbufferCol);
+ FfxFloat32 currLuma = 0.001f + RawRGBToLuminance(curBackbufferCol);
+
+ FfxUInt32 uLowPriorityFactor = FfxUInt32(ffxRound(ffxPow(MinDividedByMax(prevLuma, currLuma), 1.0f / 1.0f) * PRIORITY_LOW_MAX))
+ * FfxUInt32(IsUvInside(fUvInInterpolationRect + fGameMotionVector * fUvLetterBoxScale));
+
+ // Update primary motion vectors
+ {
+ const FfxUInt32x2 packedVectorPrimary = PackVectorFieldEntries(true, uHighPriorityFactorPrimary, uLowPriorityFactor, fMotionVectorHalf);
+
+ BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fInterpolatedLocationUv, RenderSize());
+ for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++)
+ {
+ const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+ const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
+
+ if (IsOnScreen(iSamplePos, RenderSize()))
+ {
+ UpdateGameMotionVectorField(iSamplePos, packedVectorPrimary);
+ }
+ }
+ }
+
+ // Update secondary vectors
+ // Main purpose of secondary vectors is to improve quality of inpainted vectors
+ const FfxBoolean bWriteSecondaryVectors = length(fMotionVectorHalf * RenderSize()) > FFX_FRAMEINTERPOLATION_EPSILON;
+ if (bWriteSecondaryVectors)
+ {
+ FfxBoolean bWriteSecondary = true;
+ FfxUInt32 uNumPrimaryHits = 0;
+ const FfxFloat32 fSecondaryStepScale = length(1.0f / RenderSize());
+ const FfxFloat32x2 fStepMv = normalize(fGameMotionVector);
+ const FfxFloat32 fBreakDist = ffxMin(length(fMotionVectorHalf), length(FfxFloat32x2(0.5f, 0.5f)));
+
+ for (FfxFloat32 fMvScale = fSecondaryStepScale; fMvScale <= fBreakDist && bWriteSecondary; fMvScale += fSecondaryStepScale)
+ {
+ const FfxFloat32x2 fSecondaryLocationUv = fInterpolatedLocationUv - fStepMv * fMvScale;
+ BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fSecondaryLocationUv, RenderSize());
+
+ // Reverse depth prio for secondary vectors
+ FfxUInt32 uHighPriorityFactorSecondary = ffxMax(1, PRIORITY_HIGH_MAX - uHighPriorityFactorPrimary);
+
+ const FfxFloat32x2 fToCenter = normalize(FfxFloat32x2(0.5f, 0.5f) - fSecondaryLocationUv);
+ uLowPriorityFactor = FfxUInt32(ffxMax(0.0f, dot(fToCenter, fStepMv)) * PRIORITY_LOW_MAX);
+ const FfxUInt32x2 packedVectorSecondary = PackVectorFieldEntries(false, uHighPriorityFactorSecondary, uLowPriorityFactor, fMotionVectorHalf);
+
+ // Only write secondary mvs to single bilinear location
+ for (FfxInt32 iSampleIndex = 0; iSampleIndex < 1; iSampleIndex++)
+ {
+ const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+ const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
+
+ bWriteSecondary = bWriteSecondary && IsOnScreen(iSamplePos, RenderSize());
+
+ if (bWriteSecondary)
+ {
+ const FfxUInt32 uExistingVectorFieldEntry = UpdateGameMotionVectorFieldEx(iSamplePos, packedVectorSecondary);
+
+ uNumPrimaryHits += FfxUInt32(PackedVectorFieldEntryIsPrimary(uExistingVectorFieldEntry));
+ bWriteSecondary = bWriteSecondary && (uNumPrimaryHits <= 3);
+ }
+ }
+ }
+ }
+}
+
+#endif // FFX_FRAMEINTERPOLATION_GAME_MOTION_VECTOR_FIELD_H
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_inpainting.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_inpainting.h
new file mode 100644
index 000000000000..7ad3630e0eab
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_inpainting.h
@@ -0,0 +1,150 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FRAMEINTERPOLATION_INPAINTING_H
+#define FFX_FRAMEINTERPOLATION_INPAINTING_H
+
+FfxFloat32x4 ComputeInpaintingLevel(FfxFloat32x2 fUv, const FfxInt32 iMipLevel, const FfxInt32x2 iTexSize)
+{
+ BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUv, iTexSize);
+
+ FfxFloat32x4 fColor = FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+
+ for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
+
+ const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+ const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
+
+ if (IsOnScreen(iSamplePos, iTexSize)) {
+
+ FfxFloat32x4 fSample = LoadInpaintingPyramid(iMipLevel, iSamplePos);
+
+ const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex] * FfxFloat32(fSample.w > 0.0f);
+
+ fColor += FfxFloat32x4(fSample.rgb * fWeight, fWeight);
+ }
+ }
+
+ return fColor;
+}
+
+FfxFloat32x3 ComputeInpainting(FfxInt32x2 iPxPos)
+{
+ FfxFloat32x2 fUv = (iPxPos + 0.5f) / (DisplaySize());
+
+ FfxFloat32x4 fColor = FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+ FfxFloat32 fWeightSum = 0.0f;
+ FfxInt32x2 iTexSize = DisplaySize();
+
+ for (FfxInt32 iMipLevel = 0; iMipLevel < 10; iMipLevel++) {
+
+ iTexSize /= 2;
+
+ FfxFloat32x4 fMipColor = ComputeInpaintingLevel(fUv, iMipLevel, iTexSize);
+
+ if (fMipColor.w > 0.0f) {
+ const FfxFloat32x3 fNormalizedMipColor = fMipColor.rgb / fMipColor.w;
+ const FfxFloat32 fMipWeight = ffxPow(1.0f - iMipLevel / 10.0f, 3.0f) * fMipColor.w;
+
+ fColor += FfxFloat32x4(fNormalizedMipColor, 1.0f) * fMipWeight;
+ }
+ }
+
+ return fColor.rgb / fColor.w;
+}
+
+void drawDebugTearLines(FfxInt32x2 iPxPos, inout FfxFloat32x3 fColor, inout FfxBoolean bWriteColor)
+{
+ if (iPxPos.x < 16)
+ {
+ fColor.g = 1.f;
+ bWriteColor = true;
+ }
+ else if (iPxPos.x > DisplaySize().x - 16)
+ {
+ fColor += GetDebugBarColor();
+ bWriteColor = true;
+ }
+
+}
+
+void drawDebugResetIndicators(FfxInt32x2 iPxPos, inout FfxFloat32x3 fColor, inout FfxBoolean bWriteColor)
+{
+ if (iPxPos.y < 32 && Reset())
+ {
+ fColor.r = 1.f;
+ bWriteColor = true;
+ }
+ else if (iPxPos.y > 32 && iPxPos.y < 64 && HasSceneChanged())
+ {
+ fColor.b = 1.f;
+ bWriteColor = true;
+ }
+}
+
+void computeInpainting(FfxInt32x2 iPxPos)
+{
+ FfxBoolean bWriteColor = false;
+ FfxFloat32x4 fInterpolatedColor = RWLoadFrameinterpolationOutput(iPxPos);
+
+ const FfxFloat32 fInPaintingWeight = fInterpolatedColor.w;
+ if (fInPaintingWeight > FFX_FRAMEINTERPOLATION_EPSILON)
+ {
+ fInterpolatedColor.rgb = ffxLerp(fInterpolatedColor.rgb, ComputeInpainting(iPxPos) * FfxFloat32(DisplaySize().x > 0), fInPaintingWeight);
+ bWriteColor = true;
+ }
+
+ if (GetHUDLessAttachedFactor() == 1)
+ {
+ const FfxFloat32x3 fCurrentInterpolationSource = LoadCurrentBackbuffer(iPxPos).rgb;
+ const FfxFloat32x3 fPresentColor = LoadPresentBackbuffer(iPxPos).rgb;
+
+ if (any(FFX_GREATER_THAN(abs(fCurrentInterpolationSource - fPresentColor), FfxFloat32x3(0.0, 0.0, 0.0))))
+ {
+ const FfxFloat32 fStaticFactor = CalculateStaticContentFactor(RawRGBToLinear(fCurrentInterpolationSource), RawRGBToLinear(fPresentColor));
+
+ if (fStaticFactor > FFX_FRAMEINTERPOLATION_EPSILON)
+ {
+ fInterpolatedColor.rgb = ffxLerp(fInterpolatedColor.rgb, fPresentColor, fStaticFactor);
+ bWriteColor = true;
+ }
+ }
+ }
+
+ if ((GetDispatchFlags() & FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_TEAR_LINES) != 0)
+ {
+ drawDebugTearLines(iPxPos, fInterpolatedColor.rgb, bWriteColor);
+ }
+
+ if ((GetDispatchFlags() & FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_RESET_INDICATORS) != 0)
+ {
+ drawDebugResetIndicators(iPxPos, fInterpolatedColor.rgb, bWriteColor);
+ }
+
+ if (bWriteColor)
+ {
+ StoreFrameinterpolationOutput(iPxPos, FfxFloat32x4(fInterpolatedColor.rgb, 1.0f));
+ }
+
+}
+
+#endif // FFX_FRAMEINTERPOLATION_INPAINTING_H
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field.h
new file mode 100644
index 000000000000..8432132604d1
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field.h
@@ -0,0 +1,123 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FRAMEINTERPOLATION_OPTICAL_FLOW_VECTOR_FIELD_H
+#define FFX_FRAMEINTERPOLATION_OPTICAL_FLOW_VECTOR_FIELD_H
+
+void computeOpticalFlowFieldMvs(FfxUInt32x2 dtID, FfxFloat32x2 fOpticalFlowVector)
+{
+ FfxFloat32x2 fUv = FfxFloat32x2(FfxFloat32x2(dtID)+0.5f) / GetOpticalFlowSize2();
+
+ const FfxFloat32 scaleFactor = 1.0f;
+ FfxFloat32x2 fMotionVectorHalf = fOpticalFlowVector * 0.5f;
+
+ // pixel position in current frame + fOpticalFlowVector-> pixel position in previous frame
+ FfxFloat32x3 prevBackbufferCol = SamplePreviousBackbuffer(fUv + fOpticalFlowVector).xyz; // returns previous backbuffer color of current frame pixel position in previous frame
+ FfxFloat32x3 curBackbufferCol = SampleCurrentBackbuffer(fUv).xyz; // returns current backbuffer color at current frame pixel position
+
+ FfxFloat32 prevLuma = 0.001f + RawRGBToLuminance(prevBackbufferCol);
+ FfxFloat32 currLuma = 0.001f + RawRGBToLuminance(curBackbufferCol);
+
+ FfxFloat32 fVelocity = length(fOpticalFlowVector * InterpolationRectSize());
+ FfxUInt32 uHighPriorityFactor = FfxUInt32(fVelocity > 1.0f) * FfxUInt32(ffxSaturate(fVelocity / length(InterpolationRectSize() * 0.05f)) * PRIORITY_HIGH_MAX);
+
+ if(uHighPriorityFactor > 0) {
+ FfxUInt32 uLowPriorityFactor = FfxUInt32(ffxRound(ffxPow(MinDividedByMax(prevLuma, currLuma), 1.0f / 1.0f) * PRIORITY_LOW_MAX))
+ * FfxUInt32(IsUvInside(fUv + fOpticalFlowVector));
+
+ // Project current depth into previous frame locations.
+ // Push to all pixels having some contribution if reprojection is using bilinear logic.
+
+ const FfxUInt32x2 packedVectorPrimary = PackVectorFieldEntries(true, uHighPriorityFactor, uLowPriorityFactor, fMotionVectorHalf);
+
+ BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUv + fMotionVectorHalf, GetOpticalFlowSize2());
+ for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++)
+ {
+ const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+ const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
+
+ if (IsOnScreen(iSamplePos, GetOpticalFlowSize2()))
+ {
+ UpdateOpticalflowMotionVectorField(iSamplePos, packedVectorPrimary);
+ }
+ }
+ }
+}
+
+void computeOpticalFlowVectorField(FfxInt32x2 iPxPos)
+{
+ FfxFloat32x2 fOpticalFlowVector = FfxFloat32x2(0.0, 0.0);
+ FfxFloat32x2 fOpticalFlowVector3x3Avg = FfxFloat32x2(0.0, 0.0);
+ FfxInt32 size = 1;
+ FfxFloat32 sw = 0.0f;
+
+ for(FfxInt32 y = -size; y <= size; y++) {
+ for(FfxInt32 x = -size; x <= size; x++) {
+
+ FfxInt32x2 samplePos = iPxPos + FfxInt32x2(x, y);
+
+ FfxFloat32x2 vs = LoadOpticalFlow(samplePos);
+ FfxFloat32 fConfidenceFactor = ffxMax(FFX_FRAMEINTERPOLATION_EPSILON, LoadOpticalFlowConfidence(samplePos));
+
+
+ FfxFloat32 len = length(vs * InterpolationRectSize());
+ FfxFloat32 len_factor = ffxMax(0.0f, 512.0f - len) * FfxFloat32(len > 1.0f);
+ FfxFloat32 w = len_factor;
+
+ fOpticalFlowVector3x3Avg += vs * w;
+
+ sw += w;
+ }
+ }
+
+ fOpticalFlowVector3x3Avg /= sw;
+
+
+ sw = 0.0f;
+ for(FfxInt32 y = -size; y <= size; y++) {
+ for(FfxInt32 x = -size; x <= size; x++) {
+
+ FfxInt32x2 samplePos = iPxPos + FfxInt32x2(x, y);
+
+ FfxFloat32x2 vs = LoadOpticalFlow(samplePos);
+
+ FfxFloat32 fConfidenceFactor = ffxMax(FFX_FRAMEINTERPOLATION_EPSILON, LoadOpticalFlowConfidence(samplePos));
+ FfxFloat32 len = length(vs * InterpolationRectSize());
+ FfxFloat32 len_factor = ffxMax(0.0f, 512.0f - len) * FfxFloat32(len > 1.0f);
+
+
+ FfxFloat32 w = ffxMax(0.0f, ffxPow(dot(fOpticalFlowVector3x3Avg, vs), 1.25f)) * len_factor;
+
+ fOpticalFlowVector += vs * w;
+ sw += w;
+ }
+ }
+
+ if (sw > FFX_FRAMEINTERPOLATION_EPSILON)
+ {
+ fOpticalFlowVector /= sw;
+ }
+
+ computeOpticalFlowFieldMvs(iPxPos, fOpticalFlowVector);
+}
+
+#endif // FFX_FRAMEINTERPOLATION_OPTICAL_FLOW_VECTOR_FIELD_H
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_dilated_velocity_and_previous_depth.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_dilated_velocity_and_previous_depth.h
new file mode 100644
index 000000000000..e92a57900a6b
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_dilated_velocity_and_previous_depth.h
@@ -0,0 +1,123 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FRAMEINTERPOLATION_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
+#define FFX_FRAMEINTERPOLATION_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
+
+void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize)
+{
+ fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f);
+
+ FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize;
+ FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
+
+ BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize());
+
+ // Project current depth into previous frame locations.
+ // Push to all pixels having some contribution if reprojection is using bilinear logic.
+ for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
+
+ const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+ FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
+
+ if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
+
+ FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset;
+ if (IsOnScreen(iStorePos, iPxDepthSize)) {
+ UpdateReconstructedDepthPreviousFrame(iStorePos, fDepth);
+ }
+ }
+ }
+}
+
+void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxInt32x2 iPxSize, FFX_PARAMETER_OUT FfxFloat32 fNearestDepth, FFX_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord)
+{
+ const FfxInt32 iSampleCount = 9;
+ const FfxInt32x2 iSampleOffsets[iSampleCount] = {
+ FfxInt32x2(+0, +0),
+ FfxInt32x2(+1, +0),
+ FfxInt32x2(+0, +1),
+ FfxInt32x2(+0, -1),
+ FfxInt32x2(-1, +0),
+ FfxInt32x2(-1, +1),
+ FfxInt32x2(+1, +1),
+ FfxInt32x2(-1, -1),
+ FfxInt32x2(+1, -1),
+ };
+
+ // pull out the depth loads to allow SC to batch them
+ FfxFloat32 depth[9];
+ FfxInt32 iSampleIndex = 0;
+ FFX_UNROLL
+ for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) {
+
+ FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
+ depth[iSampleIndex] = LoadInputDepth(iPos);
+ }
+
+ // find closest depth
+ fNearestDepthCoord = iPxPos;
+ fNearestDepth = depth[0];
+ FFX_UNROLL
+ for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) {
+
+ FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
+ if (IsOnScreen(iPos, iPxSize)) {
+
+ FfxFloat32 fNdDepth = depth[iSampleIndex];
+#if FFX_FRAMEINTERPOLATION_OPTION_INVERTED_DEPTH
+ if (fNdDepth > fNearestDepth) {
+#else
+ if (fNdDepth < fNearestDepth) {
+#endif
+ fNearestDepthCoord = iPos;
+ fNearestDepth = fNdDepth;
+ }
+ }
+ }
+}
+
+void ReconstructAndDilate(FfxInt32x2 iPxLrPos)
+{
+ FfxFloat32 fDilatedDepth;
+ FfxInt32x2 iNearestDepthCoord;
+
+ FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord);
+
+#if FFX_FRAMEINTERPOLATION_OPTION_LOW_RES_MOTION_VECTORS
+ FfxInt32x2 iSamplePos = iPxLrPos;
+ FfxInt32x2 iMotionVectorPos = iNearestDepthCoord;
+#else
+ FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos);
+ FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord);
+#endif
+
+ FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos);
+
+ StoreDilatedDepth(iPxLrPos, fDilatedDepth);
+ StoreDilatedMotionVectors(iPxLrPos, fDilatedMotionVector);
+
+ ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize());
+}
+
+
+#endif //!defined( FFX_FRAMEINTERPOLATION_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H )
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth.h
new file mode 100644
index 000000000000..d85f67312c59
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth.h
@@ -0,0 +1,63 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FRAMEINTERPOLATION_RECONSTRUCT_PREVIOUS_DEPTH_H
+#define FFX_FRAMEINTERPOLATION_RECONSTRUCT_PREVIOUS_DEPTH_H
+
+void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxUInt32 depthTarget, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize)
+{
+ const FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize;
+
+ // Project current depth into previous frame locations.
+ // Push to all pixels having some contribution if reprojection is using bilinear logic.
+ BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUv + fMotionVector, RenderSize());
+ for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++)
+ {
+ const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+ const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
+ const FfxFloat32 fSampleWeight = bilinearInfo.fWeights[iSampleIndex];
+
+ if (fSampleWeight > fReconstructedDepthBilinearWeightThreshold)
+ {
+ if (IsOnScreen(iSamplePos, RenderSize()))
+ {
+ if (depthTarget != 0) {
+ UpdateReconstructedDepthInterpolatedFrame(iSamplePos, fDepth);
+ }
+ }
+ }
+ }
+}
+
+void reconstructPreviousDepth(FfxInt32x2 iPxPos)
+{
+ const FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5f)) / RenderSize();
+ const FfxFloat32x2 fDistortionFieldUv = SampleDistortionField(fUv);
+ FfxInt32x2 iDistortionPixelOffset = FfxInt32x2(fDistortionFieldUv.xy * RenderSize());
+
+ FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos + iDistortionPixelOffset);
+ FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos + iDistortionPixelOffset);
+
+ ReconstructPrevDepth(iPxPos, 1, fDilatedDepth, fMotionVector * 0.5f, RenderSize());
+}
+
+#endif // FFX_FRAMEINTERPOLATION_RECONSTRUCT_PREVIOUS_DEPTH_H
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_resources.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_resources.h
new file mode 100644
index 000000000000..5505f6d15869
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_resources.h
@@ -0,0 +1,95 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FRAMEINTERPOLATION_RESOURCES_H
+#define FFX_FRAMEINTERPOLATION_RESOURCES_H
+
+#if defined(FFX_CPU) || defined(FFX_GPU)
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL 0
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT 1
+//#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_BACKBUFFER 2
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PREVIOUS_INTERPOLATION_SOURCE 3
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE 4
+
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEPTH 5
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_MOTION_VECTORS 6
+
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RESERVED_2 7
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RESERVED_3 8
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RESERVED_4 9
+
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH 10
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 11
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME 12
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME 13
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISOCCLUSION_MASK 14
+
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_X 15
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_Y 16
+
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X 17
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y 18
+
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR 19
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RESERVED_5 20
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_CONFIDENCE 21
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_GLOBAL_MOTION 22
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCENE_CHANGE_DETECTION 23
+
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RESERVED_6 25
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RESERVED_7 26
+
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEBUG_OUTPUT_0 27
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEBUG_OUTPUT_1 28
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 29
+
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_MASK 30
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PRESENT_BACKBUFFER 31
+
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS 32
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID 33 // same as FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_0
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_0 33
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_1 34
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_2 35
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_3 36
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_4 37
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_5 38
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_6 39
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_7 40
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_8 41
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_9 42
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_10 43
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_11 44
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_12 45
+
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEFAULT_DISTORTION_FIELD 46
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISTORTION_FIELD 47
+
+#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT 48
+
+#define FFX_FRAMEINTERPOLATION_CONSTANTBUFFER_IDENTIFIER 0
+#define FFX_FRAMEINTERPOLATION_INPAINTING_PYRAMID_CONSTANTBUFFER_IDENTIFIER 1
+#define FFX_FRAMEINTERPOLATION_CONSTANTBUFFER_COUNT 2
+
+#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
+
+#endif //!defined( FFX_FRAMEINTERPOLATION_RESOURCES_H )
diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_setup.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_setup.h
new file mode 100644
index 000000000000..60dd32377621
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_setup.h
@@ -0,0 +1,50 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FRAMEINTERPOLATION_SETUP_H
+#define FFX_FRAMEINTERPOLATION_SETUP_H
+
+void setupFrameinterpolationResources(FfxInt32x2 iPxPos)
+{
+ // Update reset counters
+ StoreCounter(COUNTER_SPD, 0);
+ if (all(FFX_EQUAL(iPxPos, FfxInt32x2(0, 0))))
+ {
+ if(Reset() || HasSceneChanged()) {
+ StoreCounter(COUNTER_FRAME_INDEX_SINCE_LAST_RESET, 0);
+ } else {
+ FfxUInt32 counter = RWLoadCounter(COUNTER_FRAME_INDEX_SINCE_LAST_RESET);
+ StoreCounter(COUNTER_FRAME_INDEX_SINCE_LAST_RESET, counter + 1);
+ }
+ }
+
+ // Reset resources
+ StoreGameMotionVectorFieldX(iPxPos, 0);
+ StoreGameMotionVectorFieldY(iPxPos, 0);
+
+ StoreOpticalflowMotionVectorFieldX(iPxPos, 0);
+ StoreOpticalflowMotionVectorFieldY(iPxPos, 0);
+
+ StoreDisocclusionMask(iPxPos, FfxFloat32x2(0.0, 0.0));
+}
+
+#endif // FFX_FRAMEINTERPOLATION_SETUP_H
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr1.h b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1.h
similarity index 97%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr1.h
rename to thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1.h
index 1ac23cf3de3d..82ebf21fed29 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr1.h
+++ b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -19,9 +20,10 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
-#ifdef __clang__
-#pragma clang diagnostic ignored "-Wunused-variable"
-#endif
+/// @defgroup FfxGPUFsr1 FidelityFX FSR1
+/// FidelityFX Super Resolution 1 GPU documentation
+///
+/// @ingroup FfxGPUEffects
/// Setup required constant values for EASU (works on CPU or GPU).
///
@@ -36,7 +38,7 @@
/// @param [in] outputSizeInPixelsX The display resolution which the input image gets upscaled to in X dimension.
/// @param [in] outputSizeInPixelsY The display resolution which the input image gets upscaled to in Y dimension.
///
-/// @ingroup FSR1
+/// @ingroup FfxGPUFsr1
FFX_STATIC void ffxFsrPopulateEasuConstants(
FFX_PARAMETER_INOUT FfxUInt32x4 con0,
FFX_PARAMETER_INOUT FfxUInt32x4 con1,
@@ -102,7 +104,7 @@ FFX_STATIC void ffxFsrPopulateEasuConstants(
/// @param [in] inputOffsetInPixelsX The input image offset in the X dimension into the resource containing it (useful for dynamic resolution).
/// @param [in] inputOffsetInPixelsY The input image offset in the Y dimension into the resource containing it (useful for dynamic resolution).
///
-/// @ingroup FSR1
+/// @ingroup FfxGPUFsr1
FFX_STATIC void ffxFsrPopulateEasuConstantsOffset(
FFX_PARAMETER_INOUT FfxUInt32x4 con0,
FFX_PARAMETER_INOUT FfxUInt32x4 con1,
@@ -329,7 +331,7 @@ void ffxFsrEasuFloat(
// Normalize with approximation, and cleanup close to zero.
FfxFloat32x2 dir2 = dir * dir;
FfxFloat32 dirR = dir2.x + dir2.y;
- FfxUInt32 zro = dirR < FfxFloat32(1.0 / 32768.0);
+ FfxBoolean zro = dirR < FfxFloat32(1.0 / 32768.0);
dirR = ffxApproximateReciprocalSquareRoot(dirR);
dirR = zro ? FfxFloat32(1.0) : dirR;
dir.x = zro ? FfxFloat32(1.0) : dir.x;
@@ -382,7 +384,7 @@ void ffxFsrEasuFloat(
fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.w, zzonG.w, zzonB.w)); // n
// Normalize and dering.
- pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(rcp(aW))));
+ pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(ffxReciprocal(aW))));
}
#endif // #if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT)
@@ -526,8 +528,8 @@ void FsrEasuH(
dirPY,
lenP,
ppp,
- FfxUInt32(true),
- FfxUInt32(false),
+ FfxBoolean(true),
+ FfxBoolean(false),
FfxFloat16x2(bL, cL),
FfxFloat16x2(eL, fL),
FfxFloat16x2(fL, gL),
@@ -537,8 +539,8 @@ void FsrEasuH(
dirPY,
lenP,
ppp,
- FfxUInt32(false),
- FfxUInt32(true),
+ FfxBoolean(false),
+ FfxBoolean(true),
FfxFloat16x2(fL, gL),
FfxFloat16x2(iL, jL),
FfxFloat16x2(jL, kL),
@@ -549,7 +551,7 @@ void FsrEasuH(
FfxFloat16x2 dir2 = dir * dir;
FfxFloat16 dirR = dir2.x + dir2.y;
- FfxBoolean zro = FfxBoolean(dirR < FFX_BROADCAST_FLOAT16(1.0 / 32768.0));
+ FfxUInt32 zro = FfxUInt32(dirR < FFX_BROADCAST_FLOAT16(1.0 / 32768.0));
dirR = ffxApproximateReciprocalSquareRootHalf(dirR);
dirR = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dirR;
dir.x = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dir.x;
@@ -664,7 +666,7 @@ void FsrEasuH(
sharpness = exp2(-sharpness);
FfxFloat32x2 hSharp = {sharpness, sharpness};
con[0] = ffxAsUInt32(sharpness);
- con[1] = packHalf2x16(hSharp);
+ con[1] = ffxPackHalf2x16(hSharp);
con[2] = 0;
con[3] = 0;
}
@@ -746,12 +748,12 @@ void FsrEasuH(
// Immediate constants for peak range.
FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0);
// Limiters, these need to be high precision RCPs.
- FfxFloat32 hitMinR = mn4R * rcp(FfxFloat32(4.0) * mx4R);
- FfxFloat32 hitMinG = mn4G * rcp(FfxFloat32(4.0) * mx4G);
- FfxFloat32 hitMinB = mn4B * rcp(FfxFloat32(4.0) * mx4B);
- FfxFloat32 hitMaxR = (peakC.x - mx4R) * rcp(FfxFloat32(4.0) * mn4R + peakC.y);
- FfxFloat32 hitMaxG = (peakC.x - mx4G) * rcp(FfxFloat32(4.0) * mn4G + peakC.y);
- FfxFloat32 hitMaxB = (peakC.x - mx4B) * rcp(FfxFloat32(4.0) * mn4B + peakC.y);
+ FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R);
+ FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G);
+ FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B);
+ FfxFloat32 hitMaxR = (peakC.x - mx4R) * ffxReciprocal(FfxFloat32(4.0) * mn4R + peakC.y);
+ FfxFloat32 hitMaxG = (peakC.x - mx4G) * ffxReciprocal(FfxFloat32(4.0) * mn4G + peakC.y);
+ FfxFloat32 hitMaxB = (peakC.x - mx4B) * ffxReciprocal(FfxFloat32(4.0) * mn4B + peakC.y);
FfxFloat32 lobeR = max(-hitMinR, hitMaxR);
FfxFloat32 lobeG = max(-hitMinG, hitMaxG);
FfxFloat32 lobeB = max(-hitMinB, hitMaxB);
@@ -1050,10 +1052,10 @@ void FsrEasuH(
#if defined(FFX_GPU)
void FsrSrtmF(inout FfxFloat32x3 c)
{
- c *= ffxBroadcast3(rcp(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0)));
+ c *= ffxBroadcast3(ffxReciprocal(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0)));
}
// The extra max solves the c=1.0 case (which is a /0).
- void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(rcp(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));}
+ void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(ffxReciprocal(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));}
#endif
//==============================================================================================================================
#if defined(FFX_GPU )&& FFX_HALF == 1
diff --git a/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_callbacks_glsl.h b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_callbacks_glsl.h
new file mode 100644
index 000000000000..1fd8ed7df764
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_callbacks_glsl.h
@@ -0,0 +1,221 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "ffx_fsr1_resources.h"
+
+#if defined(FFX_GPU)
+#include "../ffx_core.h"
+
+#ifndef FFX_PREFER_WAVE64
+#define FFX_PREFER_WAVE64
+#endif // #ifndef FFX_PREFER_WAVE64
+
+#if defined(FSR1_BIND_CB_FSR1)
+ layout (set = 0, binding = FSR1_BIND_CB_FSR1, std140) uniform cbFSR1_t
+ {
+ FfxUInt32x4 const0;
+ FfxUInt32x4 const1;
+ FfxUInt32x4 const2;
+ FfxUInt32x4 const3;
+ FfxUInt32x4 sample0;
+ } cbFSR1;
+#endif
+
+FfxUInt32x4 Const0()
+{
+ return cbFSR1.const0;
+}
+
+FfxUInt32x4 Const1()
+{
+ return cbFSR1.const1;
+}
+
+FfxUInt32x4 Const2()
+{
+ return cbFSR1.const2;
+}
+
+FfxUInt32x4 Const3()
+{
+ return cbFSR1.const3;
+}
+
+FfxUInt32x4 EASUSample()
+{
+ return cbFSR1.sample0;
+}
+
+FfxUInt32x4 RCasSample()
+{
+ return cbFSR1.sample0;
+}
+
+FfxUInt32x4 RCasConfig()
+{
+ return cbFSR1.const0;
+}
+
+// GODOT BEGINS
+// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them.
+layout (set = 0, binding = 100 /*1000*/) uniform sampler s_LinearClamp;
+// GODOT ENDS
+
+// SRVs
+#if defined FSR1_BIND_SRV_INPUT_COLOR
+ layout (set = 0, binding = FSR1_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color;
+#endif
+#if defined FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR
+ layout (set = 0, binding = FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR) uniform texture2D r_internal_upscaled_color;
+#endif
+#if defined FSR1_BIND_SRV_UPSCALED_OUTPUT
+ layout (set = 0, binding = FSR1_BIND_SRV_UPSCALED_OUTPUT) uniform texture2D r_upscaled_output;
+#endif
+
+// UAV declarations
+#if defined FSR1_BIND_UAV_INPUT_COLOR
+ layout (set = 0, binding = FSR1_BIND_UAV_INPUT_COLOR, rgba32f) uniform image2D rw_input_color;
+#endif
+#if defined FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR
+ layout (set = 0, binding = FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR, rgba32f) uniform image2D rw_internal_upscaled_color;
+#endif
+#if defined FSR1_BIND_UAV_UPSCALED_OUTPUT
+ layout (set = 0, binding = FSR1_BIND_UAV_UPSCALED_OUTPUT, rgba32f) uniform image2D rw_upscaled_output;
+#endif
+
+#if FFX_HALF
+
+#if defined(FSR1_BIND_SRV_INPUT_COLOR)
+ FfxFloat16x4 GatherEasuRed(FfxFloat32x2 fPxPos)
+ {
+ return FfxFloat16x4(textureGather(sampler2D(r_input_color, s_LinearClamp), fPxPos, 0));
+ }
+#endif // defined(FSR1_BIND_SRV_INPUT_COLOR)
+
+#if defined(FSR1_BIND_SRV_INPUT_COLOR)
+ FfxFloat16x4 GatherEasuGreen(FfxFloat32x2 fPxPos)
+ {
+ return FfxFloat16x4(textureGather(sampler2D(r_input_color, s_LinearClamp), fPxPos, 1));
+ }
+#endif // defined(FSR1_BIND_SRV_INPUT_COLOR)
+
+#if defined(FSR1_BIND_SRV_INPUT_COLOR)
+ FfxFloat16x4 GatherEasuBlue(FfxFloat32x2 fPxPos)
+ {
+ return FfxFloat16x4(textureGather(sampler2D(r_input_color, s_LinearClamp), fPxPos, 2));
+ }
+#endif // defined(FSR1_BIND_SRV_INPUT_COLOR)
+
+#if FFX_FSR1_OPTION_APPLY_RCAS
+ #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR)
+ void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat16x3 fColor)
+ {
+ imageStore(rw_internal_upscaled_color, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f));
+ }
+ #endif // #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR)
+#else
+ #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+ void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat16x3 fColor)
+ {
+ imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f));
+ }
+ #endif // #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+#endif // #if FFX_FSR1_OPTION_APPLY_RCAS
+
+#if defined(FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR)
+ FfxFloat16x4 LoadRCas_Input(FfxInt16x2 iPxPos)
+ {
+ return FfxFloat16x4(texelFetch(r_internal_upscaled_color, FfxInt32x2(iPxPos), 0));
+ }
+#endif // defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR)
+
+#if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+ void StoreRCasOutputHx2(FfxInt16x2 iPxPos, FfxFloat16x2 fColorR, FfxFloat16x2 fColorG, FfxFloat16x2 fColorB, FfxFloat16x2 fColorA)
+ {
+ imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColorR.x, fColorG.x, fColorB.x, fColorA.x));
+ iPxPos.x += FfxInt16(8);
+ imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColorR.y, fColorG.y, fColorB.y, fColorA.y));
+ }
+
+#endif // defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+
+#else // FFX_HALF
+
+#if defined(FSR1_BIND_SRV_INPUT_COLOR)
+ FfxFloat32x4 GatherEasuRed(FfxFloat32x2 fPxPos)
+ {
+ return textureGather(sampler2D(r_input_color, s_LinearClamp), fPxPos, 0);
+ }
+#endif // defined(FSR1_BIND_SRV_INPUT_COLOR)
+
+#if defined(FSR1_BIND_SRV_INPUT_COLOR)
+ FfxFloat32x4 GatherEasuGreen(FfxFloat32x2 fPxPos)
+ {
+ return textureGather(sampler2D(r_input_color, s_LinearClamp), fPxPos, 1);
+ }
+#endif // defined(FSR1_BIND_SRV_INPUT_COLOR)
+
+#if defined(FSR1_BIND_SRV_INPUT_COLOR)
+ FfxFloat32x4 GatherEasuBlue(FfxFloat32x2 fPxPos)
+ {
+ return textureGather(sampler2D(r_input_color, s_LinearClamp), fPxPos, 2);
+ }
+#endif // defined(FSR1_BIND_SRV_INPUT_COLOR)
+
+#if FFX_FSR1_OPTION_APPLY_RCAS
+ #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR)
+ void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
+ {
+ imageStore(rw_internal_upscaled_color, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f));
+ }
+ #endif // #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR)
+#else
+ #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+ void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
+ {
+ imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f));
+ }
+ #endif // #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+#endif // #if FFX_FSR1_OPTION_APPLY_RCAS
+
+#if defined(FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR)
+ FfxFloat32x4 LoadRCas_Input(FfxInt32x2 iPxPos)
+ {
+ return texelFetch(r_internal_upscaled_color, iPxPos, 0);
+ }
+#endif // defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR)
+
+#if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+ void StoreRCasOutput(FfxInt32x2 iPxPos, FfxFloat32x4 fColor)
+ {
+ imageStore(rw_upscaled_output, iPxPos, fColor);
+ }
+
+ void StoreRCasOutput(FfxInt32x2 iPxPos, FfxFloat32x3 fColor)
+ {
+ StoreRCasOutput(iPxPos, FfxFloat32x4(fColor, 1.f));
+ }
+#endif // defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+
+#endif // FFX_HALF
+
+#endif // #if defined(FFX_GPU)
diff --git a/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_callbacks_hlsl.h b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_callbacks_hlsl.h
new file mode 100644
index 000000000000..03e609337fcd
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_callbacks_hlsl.h
@@ -0,0 +1,257 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "ffx_fsr1_resources.h"
+
+#if defined(FFX_GPU)
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic push
+#pragma dxc diagnostic ignored "-Wambig-lit-shift"
+#endif //__hlsl_dx_compiler
+#include "ffx_core.h"
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic pop
+#endif //__hlsl_dx_compiler
+
+#ifndef FFX_PREFER_WAVE64
+#define FFX_PREFER_WAVE64
+#endif // #ifndef FFX_PREFER_WAVE64
+
+#pragma warning(disable: 3205) // conversion from larger type to smaller
+
+#define DECLARE_SRV_REGISTER(regIndex) t##regIndex
+#define DECLARE_UAV_REGISTER(regIndex) u##regIndex
+#define DECLARE_CB_REGISTER(regIndex) b##regIndex
+#define FFX_FSR1_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex))
+#define FFX_FSR1_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex))
+#define FFX_FSR1_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex))
+
+#if defined(FSR1_BIND_CB_FSR1)
+ cbuffer cbFSR1 : FFX_FSR1_DECLARE_CB(FSR1_BIND_CB_FSR1)
+ {
+ FfxUInt32x4 const0;
+ FfxUInt32x4 const1;
+ FfxUInt32x4 const2;
+ FfxUInt32x4 const3;
+ FfxUInt32x4 sample;
+ #define FFX_FSR1_CONSTANT_BUFFER_1_SIZE 20 // Number of 32-bit values. This must be kept in sync with the cbFSR1 size.
+ };
+#else
+ #define const0 0
+ #define const1 0
+ #define const2 0
+ #define const3 0
+ #define sample 0
+#endif
+
+#if defined(FFX_GPU)
+#define FFX_FSR1_ROOTSIG_STRINGIFY(p) FFX_FSR1_ROOTSIG_STR(p)
+#define FFX_FSR1_ROOTSIG_STR(p) #p
+#define FFX_FSR1_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR1_ROOTSIG_STRINGIFY(FFX_FSR1_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR1_ROOTSIG_STRINGIFY(FFX_FSR1_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "CBV(b0), " \
+ "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+
+#if defined(FFX_FSR1_EMBED_ROOTSIG)
+#define FFX_FSR1_EMBED_ROOTSIG_CONTENT FFX_FSR1_ROOTSIG
+#else
+#define FFX_FSR1_EMBED_ROOTSIG_CONTENT
+#endif // #if FFX_FSR1_EMBED_ROOTSIG
+#endif // #if defined(FFX_GPU)
+
+
+FfxUInt32x4 Const0()
+{
+ return const0;
+}
+
+FfxUInt32x4 Const1()
+{
+ return const1;
+}
+
+FfxUInt32x4 Const2()
+{
+ return const2;
+}
+
+FfxUInt32x4 Const3()
+{
+ return const3;
+}
+
+FfxUInt32x4 EASUSample()
+{
+ return sample;
+}
+
+FfxUInt32x4 RCasSample()
+{
+ return sample;
+}
+
+FfxUInt32x4 RCasConfig()
+{
+ return const0;
+}
+
+SamplerState s_LinearClamp : register(s0);
+
+ // SRVs
+ #if defined FSR1_BIND_SRV_INPUT_COLOR
+ Texture2D r_input_color : FFX_FSR1_DECLARE_SRV(FSR1_BIND_SRV_INPUT_COLOR);
+ #endif
+ #if defined FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR
+ Texture2D r_internal_upscaled_color : FFX_FSR1_DECLARE_SRV(FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR);
+ #endif
+ #if defined FSR1_BIND_SRV_UPSCALED_OUTPUT
+ Texture2D r_upscaled_output : FFX_FSR1_DECLARE_SRV(FSR1_BIND_SRV_UPSCALED_OUTPUT);
+ #endif
+
+ // UAV declarations
+ #if defined FSR1_BIND_UAV_INPUT_COLOR
+ RWTexture2D rw_input_color : FFX_FSR1_DECLARE_UAV(FSR1_BIND_UAV_INPUT_COLOR);
+ #endif
+ #if defined FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR
+ RWTexture2D rw_internal_upscaled_color : FFX_FSR1_DECLARE_UAV(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR);
+ #endif
+ #if defined FSR1_BIND_UAV_UPSCALED_OUTPUT
+ RWTexture2D rw_upscaled_output : FFX_FSR1_DECLARE_UAV(FSR1_BIND_UAV_UPSCALED_OUTPUT);
+ #endif
+
+#if FFX_HALF
+
+#if defined(FSR1_BIND_SRV_INPUT_COLOR)
+ FfxFloat16x4 GatherEasuRed(FfxFloat32x2 fPxPos)
+ {
+ return (FfxFloat16x4)r_input_color.GatherRed(s_LinearClamp, fPxPos, FfxInt32x2(0,0));
+ }
+#endif // defined(FSR1_BIND_SRV_INPUT_COLOR)
+
+#if defined(FSR1_BIND_SRV_INPUT_COLOR)
+ FfxFloat16x4 GatherEasuGreen(FfxFloat32x2 fPxPos)
+ {
+ return (FfxFloat16x4)r_input_color.GatherGreen(s_LinearClamp, fPxPos, FfxInt32x2(0, 0));
+ }
+#endif // defined(FSR1_BIND_SRV_INPUT_COLOR)
+
+#if defined(FSR1_BIND_SRV_INPUT_COLOR)
+ FfxFloat16x4 GatherEasuBlue(FfxFloat32x2 fPxPos)
+ {
+ return (FfxFloat16x4)r_input_color.GatherBlue(s_LinearClamp, fPxPos, FfxInt32x2(0, 0));
+ }
+#endif // defined(FSR1_BIND_SRV_INPUT_COLOR)
+
+#if FFX_FSR1_OPTION_APPLY_RCAS
+ #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR)
+ void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat16x3 fColor)
+ {
+ rw_internal_upscaled_color[iPxPos] = FfxFloat32x4(fColor, 1.f);
+ }
+ #endif // #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR)
+#else
+ #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+ void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat16x3 fColor)
+ {
+ rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f);
+ }
+ #endif // #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+#endif // #if FFX_FSR1_OPTION_APPLY_RCAS
+
+#if defined(FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR)
+ FfxFloat16x4 LoadRCas_Input(FfxInt16x2 iPxPos)
+ {
+ return (FfxFloat16x4)r_internal_upscaled_color[iPxPos];
+ }
+#endif // defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR)
+
+#if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+ void StoreRCasOutputHx2(FfxInt16x2 iPxPos, FfxFloat16x2 fColorR, FfxFloat16x2 fColorG, FfxFloat16x2 fColorB, FfxFloat16x2 fColorA)
+ {
+ rw_upscaled_output[iPxPos] = FfxFloat32x4(fColorR.x, fColorG.x, fColorB.x, fColorA.x);
+ iPxPos.x += 8;
+ rw_upscaled_output[iPxPos] = FfxFloat32x4(fColorR.y, fColorG.y, fColorB.y, fColorA.y);
+ }
+#endif // defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+
+#else // FFX_HALF
+
+#if defined(FSR1_BIND_SRV_INPUT_COLOR)
+ FfxFloat32x4 GatherEasuRed(FfxFloat32x2 fPxPos)
+ {
+ return r_input_color.GatherRed(s_LinearClamp, fPxPos, FfxInt32x2(0, 0));
+ }
+#endif // defined(FSR1_BIND_SRV_INPUT_COLOR)
+
+#if defined(FSR1_BIND_SRV_INPUT_COLOR)
+ FfxFloat32x4 GatherEasuGreen(FfxFloat32x2 fPxPos)
+ {
+ return r_input_color.GatherGreen(s_LinearClamp, fPxPos, FfxInt32x2(0, 0));
+ }
+#endif // defined(FSR1_BIND_SRV_INPUT_COLOR)
+
+#if defined(FSR1_BIND_SRV_INPUT_COLOR)
+ FfxFloat32x4 GatherEasuBlue(FfxFloat32x2 fPxPos)
+ {
+ return r_input_color.GatherBlue(s_LinearClamp, fPxPos, FfxInt32x2(0, 0));
+ }
+#endif // defined(FSR1_BIND_SRV_INPUT_COLOR)
+
+
+#if FFX_FSR1_OPTION_APPLY_RCAS
+ #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR)
+ void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
+ {
+ rw_internal_upscaled_color[iPxPos] = FfxFloat32x4(fColor, 1.f);
+ }
+ #endif // #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR)
+#else
+ #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+ void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
+ {
+ rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f);
+ }
+ #endif // #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+#endif // #if FFX_FSR1_OPTION_APPLY_RCAS
+
+#if defined(FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR)
+ FfxFloat32x4 LoadRCas_Input(FfxInt32x2 iPxPos)
+ {
+ return r_internal_upscaled_color[iPxPos];
+ }
+#endif // defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR)
+
+#if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+ void StoreRCasOutput(FfxInt32x2 iPxPos, FfxFloat32x4 fColor)
+ {
+ rw_upscaled_output[iPxPos] = fColor;
+ }
+#endif // defined(FSR1_BIND_UAV_UPSCALED_OUTPUT)
+
+#endif // FFX_HALF
+
+#endif // #if defined(FFX_GPU)
diff --git a/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_easu.h b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_easu.h
new file mode 100644
index 000000000000..0aebee2ce8a7
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_easu.h
@@ -0,0 +1,98 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#define GROUP_SIZE 8
+#define FSR_RCAS_DENOISE 1
+
+#include "../ffx_core.h"
+
+#if FFX_HALF
+
+ #define FFX_FSR_EASU_HALF 1
+ FfxFloat16x4 FsrEasuRH(FfxFloat32x2 p) { return GatherEasuRed(p); }
+ FfxFloat16x4 FsrEasuGH(FfxFloat32x2 p) { return GatherEasuGreen(p); }
+ FfxFloat16x4 FsrEasuBH(FfxFloat32x2 p) { return GatherEasuBlue(p); }
+
+#else
+
+ #define FFX_FSR_EASU_FLOAT 1
+ FfxFloat32x4 FsrEasuRF(FfxFloat32x2 p) { return GatherEasuRed(p); }
+ FfxFloat32x4 FsrEasuGF(FfxFloat32x2 p) { return GatherEasuGreen(p); }
+ FfxFloat32x4 FsrEasuBF(FfxFloat32x2 p) { return GatherEasuBlue(p); }
+
+#endif // FFX_HALF
+
+#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA
+ #define FSR_RCAS_PASSTHROUGH_ALPHA
+#endif // FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA
+
+#include "ffx_fsr1.h"
+
+void CurrFilter(FfxUInt32x2 pos)
+{
+#if FFX_HALF
+
+ FfxFloat16x3 c;
+ FsrEasuH(c, pos, Const0(), Const1(), Const2(), Const3());
+ if (EASUSample().x == 1)
+ {
+ c *= c;
+ }
+
+#if FFX_FSR1_OPTION_SRGB_CONVERSIONS
+ // Apply gamma if this is an sRGB format (auto-degamma'd on sampler read)
+ c = pow(c, FfxFloat16x3(1.0 / 2.2, 1.0 / 2.2, 1.0 / 2.2));
+#endif // FFX_FSR1_OPTION_SRGB_CONVERSIONS
+
+ StoreEASUOutput(pos, c);
+
+#else
+
+ FfxFloat32x3 c;
+ ffxFsrEasuFloat(c, pos, Const0(), Const1(), Const2(), Const3());
+ if (EASUSample().x == 1)
+ {
+ c *= c;
+ }
+
+#if FFX_FSR1_OPTION_SRGB_CONVERSIONS
+ // Apply gamma if this is an sRGB format (auto-degamma'd on sampler read)
+ c = pow(c, FfxFloat32x3(1.f / 2.2f, 1.f / 2.2f, 1.f / 2.2f));
+#endif // FFX_FSR1_OPTION_SRGB_CONVERSIONS
+
+ StoreEASUOutput(pos, c);
+
+#endif // FFX_HALF
+}
+
+void EASU(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid)
+{
+ // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
+ FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u);
+ CurrFilter(gxy);
+ gxy.x += 8u;
+ CurrFilter(gxy);
+ gxy.y += 8u;
+ CurrFilter(gxy);
+ gxy.x -= 8u;
+ CurrFilter(gxy);
+}
diff --git a/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_rcas.h b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_rcas.h
new file mode 100644
index 000000000000..60b3d3cafc3d
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_rcas.h
@@ -0,0 +1,116 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#define GROUP_SIZE 8
+#define FSR_RCAS_DENOISE 1
+
+#include "../ffx_core.h"
+
+#if FFX_HALF
+ #define FSR_RCAS_HX2 1
+ FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p)
+ {
+ return LoadRCas_Input(p);
+ }
+ void FsrRcasInputHx2(inout FfxFloat16x2 r,inout FfxFloat16x2 g,inout FfxFloat16x2 b) {}
+#else
+ #define FSR_RCAS_F 1
+ FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p)
+ {
+ return LoadRCas_Input(p);
+ }
+ void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {}
+#endif // FFX_HALF
+
+#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA
+ #define FSR_RCAS_PASSTHROUGH_ALPHA
+#endif // FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA
+
+#include "ffx_fsr1.h"
+
+void CurrFilter(FFX_MIN16_U2 pos)
+{
+#if FFX_HALF
+
+#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA
+ FfxFloat16x2 cr, cg, cb, ca;
+ FsrRcasHx2(cr, cg, cb, ca, pos, RCasConfig());
+#else
+ FfxFloat16x2 cr, cg, cb;
+ FsrRcasHx2(cr, cg, cb, pos, RCasConfig());
+#endif // FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA
+
+ if (RCasSample().x == 1)
+ {
+ cr *= cr;
+ cg *= cg;
+ cb *= cb;
+ }
+
+#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA
+ StoreRCasOutputHx2(FfxInt16x2(pos), cr, cg, cb, ca);
+#else
+ StoreRCasOutputHx2(FfxInt16x2(pos), cr, cg, cb, FfxFloat16x2(1.0, 1.0));
+#endif
+
+#else
+
+#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA
+ FfxFloat32x4 c;
+ FsrRcasF(c.r, c.g, c.b, c.a, pos, RCasConfig());
+#else
+ FfxFloat32x3 c;
+ FsrRcasF(c.r, c.g, c.b, pos, RCasConfig());
+#endif // FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA
+ if (RCasSample().x == 1)
+ {
+ c *= c;
+ }
+
+#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA
+ StoreRCasOutput(FfxInt32x2(pos), c);
+#else
+ StoreRCasOutput(FfxInt32x2(pos), FfxFloat32x4(c, 1.0));
+#endif
+
+#endif
+}
+
+void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid)
+{
+ // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
+ FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u);
+#if FFX_HALF
+ // packed version process left and right 8x8 tile, in total 16x8 region
+ CurrFilter(FFX_MIN16_U2(gxy));
+ gxy.y += 8u;
+ CurrFilter(FFX_MIN16_U2(gxy));
+#else
+ CurrFilter(FFX_MIN16_U2(gxy));
+ gxy.x += 8u;
+ CurrFilter(FFX_MIN16_U2(gxy));
+ gxy.y += 8u;
+ CurrFilter(FFX_MIN16_U2(gxy));
+ gxy.x -= 8u;
+ CurrFilter(FFX_MIN16_U2(gxy));
+#endif
+}
diff --git a/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_resources.h b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_resources.h
new file mode 100644
index 000000000000..eb74d7433d75
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_resources.h
@@ -0,0 +1,38 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FSR1_RESOURCES_H
+#define FFX_FSR1_RESOURCES_H
+
+#if defined(FFX_CPU) || defined(FFX_GPU)
+#define FFX_FSR1_RESOURCE_IDENTIFIER_NULL 0
+#define FFX_FSR1_RESOURCE_IDENTIFIER_INPUT_COLOR 1
+#define FFX_FSR1_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 2
+#define FFX_FSR1_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 3
+
+#define FFX_FSR1_RESOURCE_IDENTIFIER_COUNT 4
+
+#define FFX_FSR1_CONSTANTBUFFER_IDENTIFIER_FSR1 0
+
+#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
+
+#endif //!defined( FFX_FSR1_RESOURCES_H )
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_accumulate.h
similarity index 95%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h
rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_accumulate.h
index 7bd5892cb90f..71b9d26c10c4 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_accumulate.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -35,7 +36,7 @@ FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector)
void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFX_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight)
{
- // Aviod invalid values when accumulation and upsampled weight is 0
+ // Avoid invalid values when accumulation and upsampled weight is 0
fAccumulation = ffxMax(FSR2_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www);
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
@@ -65,8 +66,8 @@ void RectifyHistory(
{
FfxFloat32 fScaleFactorInfluence = ffxMin(20.0f, ffxPow(FfxFloat32(1.0f / length(DownscaleFactor().x * DownscaleFactor().y)), 3.0f));
- const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f);
- const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor));
+ const FfxFloat32 fHrVelocityFactor = ffxSaturate(params.fHrVelocity / 20.0f);
+ const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fHrVelocityFactor));
FfxFloat32 fBoxScale = ffxLerp(fScaleFactorInfluence, 1.0f, fBoxScaleT);
FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale;
@@ -157,8 +158,7 @@ FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams param
FfxFloat32 fMin = abs(fDiffs0);
- if (fMin >= fUnormThreshold)
- {
+ if (fMin >= fUnormThreshold) {
for (int i = N_MINUS_2; i <= N_MINUS_4; i++) {
FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]);
@@ -170,7 +170,7 @@ FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams param
}
}
- const FfxFloat32 fBoxSize = clippingBox.boxVec.x;
+ const FfxFloat32 fBoxSize = clippingBox.boxVec.x;
const FfxFloat32 fBoxSizeFactor = ffxPow(ffxSaturate(fBoxSize / 0.1f), 6.0f);
fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)) * fBoxSizeFactor;
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_callbacks_glsl.h
similarity index 66%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h
rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_callbacks_glsl.h
index b610037cc673..1b2e11307ed1 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_callbacks_glsl.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -18,20 +19,19 @@
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
+
#include "ffx_fsr2_resources.h"
#if defined(FFX_GPU)
-#include "ffx_core.h"
-#endif // #if defined(FFX_GPU)
+#include "../ffx_core.h"
-#if defined(FFX_GPU)
-#ifndef FFX_FSR2_PREFER_WAVE64
-#define FFX_FSR2_PREFER_WAVE64
-#endif // #if defined(FFX_GPU)
+#ifndef FFX_PREFER_WAVE64
+#define FFX_PREFER_WAVE64
+#endif // #ifndef FFX_PREFER_WAVE64
#if defined(FSR2_BIND_CB_FSR2)
- layout (set = 1, binding = FSR2_BIND_CB_FSR2, std140) uniform cbFSR2_t
- {
+ layout (set = 0, binding = FSR2_BIND_CB_FSR2, std140) uniform cbFSR2_t
+ {
FfxInt32x2 iRenderSize;
FfxInt32x2 iMaxRenderSize;
FfxInt32x2 iDisplaySize;
@@ -53,247 +53,359 @@
FfxFloat32 fDynamicResChangeFactor;
FfxFloat32 fViewSpaceToMetersFactor;
+ // GODOT BEGINS
FfxFloat32 fPad;
mat4 mReprojectionMatrix;
+ // GODOT ENDS
} cbFSR2;
-#endif
+
FfxInt32x2 RenderSize()
{
- return cbFSR2.iRenderSize;
+ return cbFSR2.iRenderSize;
}
FfxInt32x2 MaxRenderSize()
{
- return cbFSR2.iMaxRenderSize;
+ return cbFSR2.iMaxRenderSize;
}
FfxInt32x2 DisplaySize()
{
- return cbFSR2.iDisplaySize;
+ return cbFSR2.iDisplaySize;
}
FfxInt32x2 InputColorResourceDimensions()
{
- return cbFSR2.iInputColorResourceDimensions;
+ return cbFSR2.iInputColorResourceDimensions;
}
FfxInt32x2 LumaMipDimensions()
{
- return cbFSR2.iLumaMipDimensions;
+ return cbFSR2.iLumaMipDimensions;
}
FfxInt32 LumaMipLevelToUse()
{
- return cbFSR2.iLumaMipLevelToUse;
+ return cbFSR2.iLumaMipLevelToUse;
}
FfxInt32 FrameIndex()
{
- return cbFSR2.iFrameIndex;
+ return cbFSR2.iFrameIndex;
}
FfxFloat32x4 DeviceToViewSpaceTransformFactors()
{
- return cbFSR2.fDeviceToViewDepth;
+ return cbFSR2.fDeviceToViewDepth;
}
FfxFloat32x2 Jitter()
{
- return cbFSR2.fJitter;
+ return cbFSR2.fJitter;
}
FfxFloat32x2 MotionVectorScale()
{
- return cbFSR2.fMotionVectorScale;
+ return cbFSR2.fMotionVectorScale;
}
FfxFloat32x2 DownscaleFactor()
{
- return cbFSR2.fDownscaleFactor;
+ return cbFSR2.fDownscaleFactor;
}
FfxFloat32x2 MotionVectorJitterCancellation()
{
- return cbFSR2.fMotionVectorJitterCancellation;
+ return cbFSR2.fMotionVectorJitterCancellation;
}
FfxFloat32 PreExposure()
{
- return cbFSR2.fPreExposure;
+ return cbFSR2.fPreExposure;
}
FfxFloat32 PreviousFramePreExposure()
{
- return cbFSR2.fPreviousFramePreExposure;
+ return cbFSR2.fPreviousFramePreExposure;
}
FfxFloat32 TanHalfFoV()
{
- return cbFSR2.fTanHalfFOV;
+ return cbFSR2.fTanHalfFOV;
}
FfxFloat32 JitterSequenceLength()
{
- return cbFSR2.fJitterSequenceLength;
+ return cbFSR2.fJitterSequenceLength;
}
FfxFloat32 DeltaTime()
{
- return cbFSR2.fDeltaTime;
+ return cbFSR2.fDeltaTime;
}
FfxFloat32 DynamicResChangeFactor()
{
- return cbFSR2.fDynamicResChangeFactor;
+ return cbFSR2.fDynamicResChangeFactor;
}
FfxFloat32 ViewSpaceToMetersFactor()
{
- return cbFSR2.fViewSpaceToMetersFactor;
+ return cbFSR2.fViewSpaceToMetersFactor;
+}
+
+#endif // #if defined(FSR2_BIND_CB_FSR2)
+
+
+#ifdef FSR2_BIND_CB_AUTOREACTIVE
+layout(set = 0, binding = FSR2_BIND_CB_AUTOREACTIVE, std140) uniform cbGenerateReactive_t
+{
+ FfxFloat32 fTcThreshold;
+ FfxFloat32 fTcScale;
+ FfxFloat32 fReactiveScale;
+ FfxFloat32 fReactiveMax;
+} cbGenerateReactive;
+
+FfxFloat32 TcThreshold()
+{
+ return cbGenerateReactive.fTcThreshold;
+}
+
+FfxFloat32 TcScale()
+{
+ return cbGenerateReactive.fTcScale;
+}
+
+FfxFloat32 ReactiveScale()
+{
+ return cbGenerateReactive.fReactiveScale;
+}
+
+FfxFloat32 ReactiveMax()
+{
+ return cbGenerateReactive.fReactiveMax;
+}
+#endif // #ifdef FSR2_BIND_CB_AUTOREACTIVE
+
+#if defined(FSR2_BIND_CB_RCAS)
+layout(set = 0, binding = FSR2_BIND_CB_RCAS, std140) uniform cbRCAS_t
+{
+ FfxUInt32x4 rcasConfig;
+} cbRCAS;
+
+FfxUInt32x4 RCASConfig()
+{
+ return cbRCAS.rcasConfig;
+}
+#endif // #if defined(FSR2_BIND_CB_RCAS)
+
+
+#if defined(FSR2_BIND_CB_REACTIVE)
+layout(set = 0, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t
+{
+ FfxFloat32 scale;
+ FfxFloat32 threshold;
+ FfxFloat32 binaryValue;
+ FfxUInt32 flags;
+} cbGenerateReactive;
+
+FfxFloat32 GenReactiveScale()
+{
+ return cbGenerateReactive.scale;
+}
+
+FfxFloat32 GenReactiveThreshold()
+{
+ return cbGenerateReactive.threshold;
+}
+
+FfxFloat32 GenReactiveBinaryValue()
+{
+ return cbGenerateReactive.binaryValue;
+}
+
+FfxUInt32 GenReactiveFlags()
+{
+ return cbGenerateReactive.flags;
+}
+#endif // #if defined(FSR2_BIND_CB_REACTIVE)
+
+
+#if defined(FSR2_BIND_CB_SPD)
+layout(set = 0, binding = FSR2_BIND_CB_SPD, std140) uniform cbSPD_t
+{
+ FfxUInt32 mips;
+ FfxUInt32 numWorkGroups;
+ FfxUInt32x2 workGroupOffset;
+ FfxUInt32x2 renderSize;
+} cbSPD;
+
+FfxUInt32 MipCount()
+{
+ return cbSPD.mips;
}
-layout (set = 0, binding = 0) uniform sampler s_PointClamp;
-layout (set = 0, binding = 1) uniform sampler s_LinearClamp;
+FfxUInt32 NumWorkGroups()
+{
+ return cbSPD.numWorkGroups;
+}
+
+FfxUInt32x2 WorkGroupOffset()
+{
+ return cbSPD.workGroupOffset;
+}
+
+FfxUInt32x2 SPD_RenderSize()
+{
+ return cbSPD.renderSize;
+}
+#endif // #if defined(FSR2_BIND_CB_SPD)
+
+// GODOT BEGINS
+// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them.
+layout (set = 0, binding = 100 /*1000*/) uniform sampler s_PointClamp;
+layout (set = 0, binding = 101 /*1001*/) uniform sampler s_LinearClamp;
+// GODOT ENDS
// SRVs
#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
- layout (set = 1, binding = FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) uniform texture2D r_input_opaque_only;
+ layout (set = 0, binding = FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) uniform texture2D r_input_opaque_only;
#endif
#if defined(FSR2_BIND_SRV_INPUT_COLOR)
- layout (set = 1, binding = FSR2_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color_jittered;
+ layout (set = 0, binding = FSR2_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color_jittered;
#endif
#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS)
- layout (set = 1, binding = FSR2_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors;
+ layout (set = 0, binding = FSR2_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors;
#endif
#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
- layout (set = 1, binding = FSR2_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth;
+ layout (set = 0, binding = FSR2_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth;
#endif
#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE)
- layout (set = 1, binding = FSR2_BIND_SRV_INPUT_EXPOSURE) uniform texture2D r_input_exposure;
+ layout (set = 0, binding = FSR2_BIND_SRV_INPUT_EXPOSURE) uniform texture2D r_input_exposure;
#endif
#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE)
- layout(set = 1, binding = FSR2_BIND_SRV_AUTO_EXPOSURE) uniform texture2D r_auto_exposure;
+ layout(set = 0, binding = FSR2_BIND_SRV_AUTO_EXPOSURE) uniform texture2D r_auto_exposure;
#endif
#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
- layout (set = 1, binding = FSR2_BIND_SRV_REACTIVE_MASK) uniform texture2D r_reactive_mask;
+ layout (set = 0, binding = FSR2_BIND_SRV_REACTIVE_MASK) uniform texture2D r_reactive_mask;
#endif
#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
- layout (set = 1, binding = FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) uniform texture2D r_transparency_and_composition_mask;
+ layout (set = 0, binding = FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) uniform texture2D r_transparency_and_composition_mask;
#endif
#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
- layout (set = 1, binding = FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) uniform utexture2D r_reconstructed_previous_nearest_depth;
+ layout (set = 0, binding = FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) uniform utexture2D r_reconstructed_previous_nearest_depth;
#endif
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
- layout (set = 1, binding = FSR2_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors;
+ layout (set = 0, binding = FSR2_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors;
#endif
#if defined (FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
- layout(set = 1, binding = FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) uniform texture2D r_previous_dilated_motion_vectors;
+ layout(set = 0, binding = FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) uniform texture2D r_previous_dilated_motion_vectors;
#endif
#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
- layout (set = 1, binding = FSR2_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilatedDepth;
+ layout (set = 0, binding = FSR2_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilatedDepth;
#endif
#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED)
- layout (set = 1, binding = FSR2_BIND_SRV_INTERNAL_UPSCALED) uniform texture2D r_internal_upscaled_color;
+ layout (set = 0, binding = FSR2_BIND_SRV_INTERNAL_UPSCALED) uniform texture2D r_internal_upscaled_color;
#endif
#if defined(FSR2_BIND_SRV_LOCK_STATUS)
- layout (set = 1, binding = FSR2_BIND_SRV_LOCK_STATUS) uniform texture2D r_lock_status;
+ layout (set = 0, binding = FSR2_BIND_SRV_LOCK_STATUS) uniform texture2D r_lock_status;
#endif
#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA)
- layout (set = 1, binding = FSR2_BIND_SRV_LOCK_INPUT_LUMA) uniform texture2D r_lock_input_luma;
+ layout (set = 0, binding = FSR2_BIND_SRV_LOCK_INPUT_LUMA) uniform texture2D r_lock_input_luma;
#endif
#if defined(FSR2_BIND_SRV_NEW_LOCKS)
- layout(set = 1, binding = FSR2_BIND_SRV_NEW_LOCKS) uniform texture2D r_new_locks;
+ layout(set = 0, binding = FSR2_BIND_SRV_NEW_LOCKS) uniform texture2D r_new_locks;
#endif
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
- layout (set = 1, binding = FSR2_BIND_SRV_PREPARED_INPUT_COLOR) uniform texture2D r_prepared_input_color;
+ layout (set = 0, binding = FSR2_BIND_SRV_PREPARED_INPUT_COLOR) uniform texture2D r_prepared_input_color;
#endif
#if defined(FSR2_BIND_SRV_LUMA_HISTORY)
- layout (set = 1, binding = FSR2_BIND_SRV_LUMA_HISTORY) uniform texture2D r_luma_history;
+ layout (set = 0, binding = FSR2_BIND_SRV_LUMA_HISTORY) uniform texture2D r_luma_history;
#endif
#if defined(FSR2_BIND_SRV_RCAS_INPUT)
- layout (set = 1, binding = FSR2_BIND_SRV_RCAS_INPUT) uniform texture2D r_rcas_input;
+ layout (set = 0, binding = FSR2_BIND_SRV_RCAS_INPUT) uniform texture2D r_rcas_input;
#endif
#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
- layout (set = 1, binding = FSR2_BIND_SRV_LANCZOS_LUT) uniform texture2D r_lanczos_lut;
+ layout (set = 0, binding = FSR2_BIND_SRV_LANCZOS_LUT) uniform texture2D r_lanczos_lut;
#endif
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
- layout (set = 1, binding = FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) uniform texture2D r_imgMips;
+ layout (set = 0, binding = FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) uniform texture2D r_imgMips;
#endif
#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
- layout (set = 1, binding = FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) uniform texture2D r_upsample_maximum_bias_lut;
+ layout (set = 0, binding = FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) uniform texture2D r_upsample_maximum_bias_lut;
#endif
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
- layout (set = 1, binding = FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) uniform texture2D r_dilated_reactive_masks;
-#endif
+ layout (set = 0, binding = FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) uniform texture2D r_dilated_reactive_masks;
+#endif
#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR)
- layout(set = 1, binding = FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) uniform texture2D r_input_prev_color_pre_alpha;
+ layout(set = 0, binding = FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) uniform texture2D r_input_prev_color_pre_alpha;
#endif
#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR)
- layout(set = 1, binding = FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) uniform texture2D r_input_prev_color_post_alpha;
+ layout(set = 0, binding = FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) uniform texture2D r_input_prev_color_post_alpha;
#endif
// UAV
#if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH
- layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth;
+ layout (set = 0, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth;
#endif
#if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
- layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) writeonly uniform image2D rw_dilated_motion_vectors;
+ layout (set = 0, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) writeonly uniform image2D rw_dilated_motion_vectors;
#endif
#if defined FSR2_BIND_UAV_DILATED_DEPTH
- layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f) writeonly uniform image2D rw_dilatedDepth;
+ layout (set = 0, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f) writeonly uniform image2D rw_dilatedDepth;
#endif
#if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
- layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f) writeonly uniform image2D rw_internal_upscaled_color;
+ layout (set = 0, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f) writeonly uniform image2D rw_internal_upscaled_color;
#endif
#if defined FSR2_BIND_UAV_LOCK_STATUS
- layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, rg16f) uniform image2D rw_lock_status;
+ layout (set = 0, binding = FSR2_BIND_UAV_LOCK_STATUS, rg16f) uniform image2D rw_lock_status;
#endif
#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA)
- layout(set = 1, binding = FSR2_BIND_UAV_LOCK_INPUT_LUMA, r16f) writeonly uniform image2D rw_lock_input_luma;
+ layout(set = 0, binding = FSR2_BIND_UAV_LOCK_INPUT_LUMA, r16f) writeonly uniform image2D rw_lock_input_luma;
#endif
#if defined FSR2_BIND_UAV_NEW_LOCKS
- layout(set = 1, binding = FSR2_BIND_UAV_NEW_LOCKS, r8) uniform image2D rw_new_locks;
+ layout(set = 0, binding = FSR2_BIND_UAV_NEW_LOCKS, r8) uniform image2D rw_new_locks;
#endif
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
- layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16) writeonly uniform image2D rw_prepared_input_color;
+ layout (set = 0, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16) writeonly uniform image2D rw_prepared_input_color;
#endif
#if defined FSR2_BIND_UAV_LUMA_HISTORY
- layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history;
+ layout (set = 0, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history;
#endif
#if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
- layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output;
+ layout (set = 0, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output;
#endif
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
- layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f) coherent uniform image2D rw_img_mip_shading_change;
+ layout (set = 0, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f) coherent uniform image2D rw_img_mip_shading_change;
#endif
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
- layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f) coherent uniform image2D rw_img_mip_5;
+ layout (set = 0, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f) coherent uniform image2D rw_img_mip_5;
#endif
#if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
- layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg8) writeonly uniform image2D rw_dilated_reactive_masks;
-#endif
-#if defined FSR2_BIND_UAV_EXPOSURE
- layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure;
+ layout (set = 0, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg8) writeonly uniform image2D rw_dilated_reactive_masks;
+#endif
+#if defined FSR2_BIND_UAV_EXPOSURE
+ layout (set = 0, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure;
#endif
#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
- layout(set = 1, binding = FSR2_BIND_UAV_AUTO_EXPOSURE, rg32f) uniform image2D rw_auto_exposure;
+ layout(set = 0, binding = FSR2_BIND_UAV_AUTO_EXPOSURE, rg32f) uniform image2D rw_auto_exposure;
#endif
-#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
- layout (set = 1, binding = FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui) coherent uniform uimage2D rw_spd_global_atomic;
+#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+ layout (set = 0, binding = FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui) coherent uniform uimage2D rw_spd_global_atomic;
#endif
#if defined FSR2_BIND_UAV_AUTOREACTIVE
- layout(set = 1, binding = FSR2_BIND_UAV_AUTOREACTIVE, r32f) uniform image2D rw_output_autoreactive;
+ layout(set = 0, binding = FSR2_BIND_UAV_AUTOREACTIVE, r32f) uniform image2D rw_output_autoreactive;
#endif
#if defined FSR2_BIND_UAV_AUTOCOMPOSITION
- layout(set = 1, binding = FSR2_BIND_UAV_AUTOCOMPOSITION, r32f) uniform image2D rw_output_autocomposition;
+ layout(set = 0, binding = FSR2_BIND_UAV_AUTOCOMPOSITION, r32f) uniform image2D rw_output_autocomposition;
#endif
#if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR
- layout(set = 1, binding = FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_pre_alpha;
+ layout(set = 0, binding = FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_pre_alpha;
#endif
#if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR
- layout(set = 1, binding = FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_post_alpha;
+ layout(set = 0, binding = FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_post_alpha;
#endif
#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
@@ -317,14 +429,16 @@ FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos)
}
#endif
-#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
+#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos)
{
+ // GODOT BEGINS
#if FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP
return min(texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r, 0.9f);
#else
- return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r;
+ return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r;
#endif
+ // GODOT ENDS
}
#endif
@@ -361,15 +475,17 @@ FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos)
{
FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy;
+ // GODOT BEGINS
#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
bool bInvalidMotionVector = all(lessThanEqual(fSrcMotionVector, vec2(-1.0f, -1.0f)));
if (bInvalidMotionVector)
{
FfxFloat32 fSrcDepth = LoadInputDepth(iPxDilatedMotionVectorPos);
FfxFloat32x2 fUv = (iPxDilatedMotionVectorPos + FfxFloat32(0.5)) / RenderSize();
- fSrcMotionVector = FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(fUv, fSrcDepth, cbFSR2.mReprojectionMatrix);
+ fSrcMotionVector = FFX_FSR_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(fUv, fSrcDepth, cbFSR2.mReprojectionMatrix);
}
#endif
+ // GODOT ENDS
FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
@@ -402,6 +518,15 @@ FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
}
#endif
+FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos)
+{
+#if defined(FSR2_BIND_SRV_RCAS_INPUT)
+ return texelFetch(r_rcas_input, iPxPos, 0);
+#else
+ return FfxFloat32x4(0.0);
+#endif
+}
+
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
void StoreReprojectedHistory(FfxInt32x2 iPxHistory, FfxFloat32x4 fHistory)
{
@@ -533,12 +658,12 @@ void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue)
#if defined(FSR2_BIND_UAV_DILATED_DEPTH)
void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
{
- //FfxUInt32 uDepth = f32tof16(fDepth);
+ //FfxUInt32 uDepth = ffxF32ToF16(fDepth);
imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f));
}
#endif
-#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS)
+#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS)
void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
{
imageStore(rw_dilated_motion_vectors, iPxPos, vec4(fMotionVector, 0.0f, 0.0f));
@@ -607,7 +732,7 @@ FfxFloat32 AutoExposure()
FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
{
#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
- return textureLod(sampler2D(r_lanczos_lut, s_LinearClamp), FfxFloat32x2(x / 2.0f, 0.5f), 0.0f).x;
+ return textureLod(sampler2D(r_lanczos_lut, s_LinearClamp), FfxFloat32x2(x / 2.0f, 0.5f), 0.0f).x;
#else
return 0.f;
#endif
@@ -642,13 +767,6 @@ void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER
}
#endif
-#if defined(FFX_INTERNAL)
-FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV)
-{
- return textureLod(sampler2D(r_debug_out, s_LinearClamp), fUV, 0.0f).rgba;
-}
-#endif
-
#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
{
@@ -695,4 +813,69 @@ void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN F
}
#endif
+FfxFloat32x2 SPD_LoadExposureBuffer()
+{
+#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+ return imageLoad(rw_auto_exposure, ivec2(0, 0)).xy;
+#else
+ return FfxFloat32x2(0.f, 0.f);
+#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+}
+
+void SPD_SetExposureBuffer(FfxFloat32x2 value)
+{
+#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+ imageStore(rw_auto_exposure, ivec2(0, 0), vec4(value, 0.0f, 0.0f));
+#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+}
+
+FfxFloat32x4 SPD_LoadMipmap5(FfxInt32x2 iPxPos)
+{
+#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+ return FfxFloat32x4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f);
+#else
+ return FfxFloat32x4(0.f, 0.f, 0.f, 0.f);
+#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+}
+
+void SPD_SetMipmap(FfxInt32x2 iPxPos, FfxUInt32 slice, FfxFloat32 value)
+{
+ switch (slice)
+ {
+ case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL:
+#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
+ imageStore(rw_img_mip_shading_change, iPxPos, FfxFloat32x4(value, 0.0f, 0.0f, 0.0f));
+#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
+ break;
+ case 5:
+#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+ imageStore(rw_img_mip_5, iPxPos, FfxFloat32x4(value, 0.0f, 0.0f, 0.0f));
+#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+ break;
+ default:
+
+ // avoid flattened side effect
+#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE)
+ imageStore(rw_img_mip_shading_change, iPxPos, FfxFloat32x4(imageLoad(rw_img_mip_shading_change, iPxPos).x, 0.0f, 0.0f, 0.0f));
+#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5)
+ imageStore(rw_img_mip_5, iPxPos, FfxFloat32x4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f));
+#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
+ break;
+ }
+}
+
+void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter)
+{
+#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+ spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0, 0), 1);
+#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+}
+
+void SPD_ResetAtomicCounter()
+{
+#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+ imageStore(rw_spd_global_atomic, ivec2(0, 0), uvec4(0));
+#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+}
+
#endif // #if defined(FFX_GPU)
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_callbacks_hlsl.h
similarity index 64%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h
rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_callbacks_hlsl.h
index fd722b307e2f..8daafea000e6 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_callbacks_hlsl.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -26,20 +27,16 @@
#pragma dxc diagnostic push
#pragma dxc diagnostic ignored "-Wambig-lit-shift"
#endif //__hlsl_dx_compiler
-#include "ffx_core.h"
+#include "../ffx_core.h"
#ifdef __hlsl_dx_compiler
#pragma dxc diagnostic pop
#endif //__hlsl_dx_compiler
-#endif // #if defined(FFX_GPU)
-#if defined(FFX_GPU)
-#ifndef FFX_FSR2_PREFER_WAVE64
-#define FFX_FSR2_PREFER_WAVE64
-#endif // #if defined(FFX_GPU)
+#ifndef FFX_PREFER_WAVE64
+#define FFX_PREFER_WAVE64
+#endif // #ifndef FFX_PREFER_WAVE64
-#if defined(FFX_GPU)
#pragma warning(disable: 3205) // conversion from larger type to smaller
-#endif // #if defined(FFX_GPU)
#define DECLARE_SRV_REGISTER(regIndex) t##regIndex
#define DECLARE_UAV_REGISTER(regIndex) u##regIndex
@@ -48,7 +45,7 @@
#define FFX_FSR2_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex))
#define FFX_FSR2_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex))
-#if defined(FSR2_BIND_CB_FSR2) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_CB_FSR2)
cbuffer cbFSR2 : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2)
{
FfxInt32x2 iRenderSize;
@@ -71,56 +68,10 @@
FfxFloat32 fDeltaTime;
FfxFloat32 fDynamicResChangeFactor;
FfxFloat32 fViewSpaceToMetersFactor;
+ FfxFloat32 fPadding;
};
-#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR2) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR2 size.
-#endif
-
-#if defined(FFX_GPU)
-#define FFX_FSR2_ROOTSIG_STRINGIFY(p) FFX_FSR2_ROOTSIG_STR(p)
-#define FFX_FSR2_ROOTSIG_STR(p) #p
-#define FFX_FSR2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
- "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
- "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
- "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
- "addressU = TEXTURE_ADDRESS_CLAMP, " \
- "addressV = TEXTURE_ADDRESS_CLAMP, " \
- "addressW = TEXTURE_ADDRESS_CLAMP, " \
- "comparisonFunc = COMPARISON_NEVER, " \
- "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
- "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
- "addressU = TEXTURE_ADDRESS_CLAMP, " \
- "addressV = TEXTURE_ADDRESS_CLAMP, " \
- "addressW = TEXTURE_ADDRESS_CLAMP, " \
- "comparisonFunc = COMPARISON_NEVER, " \
- "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
-
-#define FFX_FSR2_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size.
-
-#define FFX_FSR2_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
- "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
- "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
- "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_2_SIZE) ", b1), " \
- "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
- "addressU = TEXTURE_ADDRESS_CLAMP, " \
- "addressV = TEXTURE_ADDRESS_CLAMP, " \
- "addressW = TEXTURE_ADDRESS_CLAMP, " \
- "comparisonFunc = COMPARISON_NEVER, " \
- "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
- "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
- "addressU = TEXTURE_ADDRESS_CLAMP, " \
- "addressV = TEXTURE_ADDRESS_CLAMP, " \
- "addressW = TEXTURE_ADDRESS_CLAMP, " \
- "comparisonFunc = COMPARISON_NEVER, " \
- "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
-#if defined(FFX_FSR2_EMBED_ROOTSIG)
-#define FFX_FSR2_EMBED_ROOTSIG_CONTENT FFX_FSR2_ROOTSIG
-#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR2_CB2_ROOTSIG
-#else
-#define FFX_FSR2_EMBED_ROOTSIG_CONTENT
-#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT
-#endif // #if FFX_FSR2_EMBED_ROOTSIG
-#endif // #if defined(FFX_GPU)
+#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE 32
/* Define getter functions in the order they are defined in the CB! */
FfxInt32x2 RenderSize()
@@ -217,66 +168,181 @@ FfxFloat32 ViewSpaceToMetersFactor()
{
return fViewSpaceToMetersFactor;
}
+#endif // #if defined(FSR2_BIND_CB_FSR2)
+#define FFX_FSR2_ROOTSIG_STRINGIFY(p) FFX_FSR2_ROOTSIG_STR(p)
+#define FFX_FSR2_ROOTSIG_STR(p) #p
+#define FFX_FSR2_ROOTSIG [RootSignature("DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "CBV(b0), " \
+ "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
+ "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+
+#define FFX_FSR2_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size.
+
+#define FFX_FSR2_CB2_ROOTSIG [RootSignature("DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "CBV(b0), " \
+ "CBV(b1), " \
+ "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
+ "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+
+#define FFX_FSR2_CONSTANT_BUFFER_3_SIZE 4 // Number of 32-bit values. This must be kept in sync with cbGenerateReactive size.
+
+#define FFX_FSR2_REACTIVE_ROOTSIG [RootSignature("DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "CBV(b0), " \
+ "CBV(b1), " \
+ "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
+ "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+
+#if defined(FFX_FSR2_EMBED_ROOTSIG)
+#define FFX_FSR2_EMBED_ROOTSIG_CONTENT FFX_FSR2_ROOTSIG
+#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR2_CB2_ROOTSIG
+#define FFX_FSR2_EMBED_ROOTSIG_REACTIVE_CONTENT FFX_FSR2_REACTIVE_ROOTSIG
+#else
+#define FFX_FSR2_EMBED_ROOTSIG_CONTENT
+#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT
+#define FFX_FSR2_EMBED_ROOTSIG_REACTIVE_CONTENT
+#endif // #if FFX_FSR2_EMBED_ROOTSIG
+
+#if defined(FSR2_BIND_CB_AUTOREACTIVE)
+cbuffer cbGenerateReactive : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_AUTOREACTIVE)
+{
+ FfxFloat32 fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels
+ FfxFloat32 fTcScale;
+ FfxFloat32 fReactiveScale;
+ FfxFloat32 fReactiveMax;
+};
+
+FfxFloat32 TcThreshold()
+{
+ return fTcThreshold;
+}
+
+FfxFloat32 TcScale()
+{
+ return fTcScale;
+}
+
+FfxFloat32 ReactiveScale()
+{
+ return fReactiveScale;
+}
+
+FfxFloat32 ReactiveMax()
+{
+ return fReactiveMax;
+}
+#endif // #if defined(FSR2_BIND_CB_AUTOREACTIVE)
+
+#if defined(FSR2_BIND_CB_RCAS)
+cbuffer cbRCAS : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_RCAS)
+{
+ FfxUInt32x4 rcasConfig;
+};
+
+FfxUInt32x4 RCASConfig()
+{
+ return rcasConfig;
+}
+#endif // #if defined(FSR2_BIND_CB_RCAS)
+
+
+#if defined(FSR2_BIND_CB_REACTIVE)
+cbuffer cbGenerateReactive : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_REACTIVE)
+{
+ FfxFloat32 gen_reactive_scale;
+ FfxFloat32 gen_reactive_threshold;
+ FfxFloat32 gen_reactive_binaryValue;
+ FfxUInt32 gen_reactive_flags;
+};
+
+FfxFloat32 GenReactiveScale()
+{
+ return gen_reactive_scale;
+}
+
+FfxFloat32 GenReactiveThreshold()
+{
+ return gen_reactive_threshold;
+}
+
+FfxFloat32 GenReactiveBinaryValue()
+{
+ return gen_reactive_binaryValue;
+}
+
+FfxUInt32 GenReactiveFlags()
+{
+ return gen_reactive_flags;
+}
+#endif // #if defined(FSR2_BIND_CB_REACTIVE)
+
+#if defined(FSR2_BIND_CB_SPD)
+cbuffer cbSPD : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_SPD) {
+
+ FfxUInt32 mips;
+ FfxUInt32 numWorkGroups;
+ FfxUInt32x2 workGroupOffset;
+ FfxUInt32x2 renderSize;
+};
+
+FfxUInt32 MipCount()
+{
+ return mips;
+}
+
+FfxUInt32 NumWorkGroups()
+{
+ return numWorkGroups;
+}
+
+FfxUInt32x2 WorkGroupOffset()
+{
+ return workGroupOffset;
+}
+
+FfxUInt32x2 SPD_RenderSize()
+{
+ return renderSize;
+}
+#endif // #if defined(FSR2_BIND_CB_SPD)
SamplerState s_PointClamp : register(s0);
SamplerState s_LinearClamp : register(s1);
-// SRVs
-#if defined(FFX_INTERNAL)
- Texture2D r_input_opaque_only : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY);
- Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR);
- Texture2D r_input_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS);
- Texture2D r_input_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH);
- Texture2D r_input_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE);
- Texture2D r_auto_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
- Texture2D r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK);
- Texture2D r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK);
- Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
- Texture2D r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
- Texture2D r_previous_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS);
- Texture2D r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
- Texture2D r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
- Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
- Texture2D r_lock_input_luma : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA);
- Texture2D r_new_locks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS);
- Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
- Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
- Texture2D r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT);
- Texture2D r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT);
- Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE);
- Texture2D r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT);
- Texture2D r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
- Texture2D r_input_prev_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
- Texture2D r_input_prev_color_post_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
-
- Texture2D r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
-
- // UAV declarations
- RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
- RWTexture2D rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
- RWTexture2D rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
- RWTexture2D rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
- RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
- RWTexture2D rw_lock_input_luma : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA);
- RWTexture2D rw_new_locks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS);
- RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
- RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
- RWTexture2D rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT);
-
- globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE);
- globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5);
- RWTexture2D rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
- RWTexture2D rw_auto_exposure : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
- globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT);
- RWTexture2D rw_debug_out : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
-
- RWTexture2D rw_output_autoreactive : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE);
- RWTexture2D rw_output_autocomposition : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION);
- RWTexture2D rw_output_prev_color_pre_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
- RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
-
-#else // #if defined(FFX_INTERNAL)
+ // SRVs
#if defined FSR2_BIND_SRV_INPUT_COLOR
Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR);
#endif
@@ -416,72 +482,79 @@ SamplerState s_LinearClamp : register(s1);
#if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR
RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR);
#endif
-#endif // #if defined(FFX_INTERNAL)
-#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel)
{
return r_imgMips.mips[mipLevel][iPxPos];
}
#endif
-#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel)
{
return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel);
}
#endif
-#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos)
{
return r_input_depth[iPxPos];
}
#endif
-#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV)
{
return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x;
}
#endif
-#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos)
{
return r_reactive_mask[iPxPos];
}
#endif
-#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
{
return r_transparency_and_composition_mask[iPxPos];
}
#endif
-#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_INPUT_COLOR)
FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos)
{
return r_input_color_jittered[iPxPos].rgb;
}
#endif
-#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_INPUT_COLOR)
FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
{
return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb;
}
#endif
-#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos)
{
return r_prepared_input_color[iPxPos].xyz;
}
+
+#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1)
+FFX_MIN16_F3 LoadPreparedInputColorHalf(FfxUInt32x2 iPxPos)
+{
+ return FFX_MIN16_F3(r_prepared_input_color[iPxPos].xyz);
+}
#endif
-#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) || defined(FFX_INTERNAL)
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS)
FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos)
{
FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy;
@@ -496,49 +569,51 @@ FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos)
}
#endif
-#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED)
FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory)
{
return r_internal_upscaled_color[iPxHistory];
}
#endif
-#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_LUMA_HISTORY)
void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
{
rw_luma_history[iPxPos] = fLumaHistory;
}
#endif
-#if defined(FSR2_BIND_SRV_LUMA_HISTORY) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_LUMA_HISTORY)
FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
{
return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0);
}
#endif
-#if defined(FFX_INTERNAL)
-FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV)
+FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos)
{
- return r_debug_out.SampleLevel(s_LinearClamp, fUV, 0).w;
-}
+#if defined(FSR2_BIND_SRV_RCAS_INPUT)
+ return r_rcas_input[iPxPos];
+#else
+ return 0.0;
#endif
+}
-#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory)
{
rw_internal_upscaled_color[iPxHistory] = fHistory;
}
#endif
-#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
{
rw_internal_upscaled_color[iPxPos] = fColorAndWeight;
}
#endif
-#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT)
void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
{
rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f);
@@ -547,70 +622,70 @@ void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
//LOCK_LIFETIME_REMAINING == 0
//Should make LockInitialLifetime() return a const 1.0f later
-#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_LOCK_STATUS)
FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos)
{
return r_lock_status[iPxPos];
}
#endif
-#if defined(FSR2_BIND_UAV_LOCK_STATUS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_LOCK_STATUS)
void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus)
{
rw_lock_status[iPxPos] = fLockStatus;
}
#endif
-#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA)
FfxFloat32 LoadLockInputLuma(FfxUInt32x2 iPxPos)
{
return r_lock_input_luma[iPxPos];
}
#endif
-#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA)
void StoreLockInputLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma)
{
rw_lock_input_luma[iPxPos] = fLuma;
}
#endif
-#if defined(FSR2_BIND_SRV_NEW_LOCKS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_NEW_LOCKS)
FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos)
{
return r_new_locks[iPxPos];
}
#endif
-#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_NEW_LOCKS)
FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos)
{
return rw_new_locks[iPxPos];
}
#endif
-#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_NEW_LOCKS)
void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock)
{
rw_new_locks[iPxPos] = newLock;
}
#endif
-#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR)
void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped)
{
rw_prepared_input_color[iPxPos] = fTonemapped;
}
#endif
-#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV)
{
return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w;
}
#endif
-#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_LOCK_STATUS)
FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV)
{
FfxFloat32x2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0);
@@ -618,14 +693,14 @@ FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV)
}
#endif
-#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos)
{
return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]);
}
#endif
-#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth)
{
FfxUInt32 uDepth = asuint(fDepth);
@@ -638,35 +713,35 @@ void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth)
}
#endif
-#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue)
{
rw_reconstructed_previous_nearest_depth[iPxSample] = uValue;
}
#endif
-#if defined(FSR2_BIND_UAV_DILATED_DEPTH) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_DILATED_DEPTH)
void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
{
rw_dilatedDepth[iPxPos] = fDepth;
}
#endif
-#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS)
void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
{
rw_dilated_motion_vectors[iPxPos] = fMotionVector;
}
#endif
-#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput)
{
return r_dilated_motion_vectors[iPxInput].xy;
}
#endif
-#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput)
{
return r_previous_dilated_motion_vectors[iPxInput].xy;
@@ -678,14 +753,14 @@ FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv)
}
#endif
-#if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput)
{
return r_dilatedDepth[iPxInput];
}
#endif
-#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE)
FfxFloat32 Exposure()
{
FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x;
@@ -698,7 +773,7 @@ FfxFloat32 Exposure()
}
#endif
-#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE)
FfxFloat32 AutoExposure()
{
FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x;
@@ -713,14 +788,35 @@ FfxFloat32 AutoExposure()
FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
{
-#if defined(FSR2_BIND_SRV_LANCZOS_LUT) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0);
#else
return 0.f;
#endif
}
-#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) || defined(FFX_INTERNAL)
+#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1)
+
+FFX_MIN16_F SampleLanczos2Weight_NoValu(FFX_MIN16_F x)
+{
+#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
+ return FFX_MIN16_F(r_lanczos_lut.SampleLevel(s_LinearClamp, __XB_AsHalf(__XB_V_PACK_B32_F16(x, 0.5)), 0));
+#else
+ return 0.0;
+#endif
+}
+
+FFX_MIN16_F SampleLanczos2Weight_NoValuNoA16(FfxFloat32 x)
+{
+#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
+ return FFX_MIN16_F(r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x, 0.5), 0));
+#else
+ return 0.0;
+#endif
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
{
// Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range.
@@ -728,50 +824,50 @@ FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
}
#endif
-#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
{
return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0);
}
#endif
-#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos)
{
return r_dilated_reactive_masks[iPxPos];
}
#endif
-#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS)
void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks)
{
rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks;
}
#endif
-#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
{
return r_input_opaque_only[iPxPos].xyz;
}
#endif
-#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR)
FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
{
return r_input_prev_color_pre_alpha[iPxPos];
}
#endif
-#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR)
FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
{
return r_input_prev_color_post_alpha[iPxPos];
}
#endif
-#if defined(FSR2_BIND_UAV_AUTOREACTIVE) || defined(FFX_INTERNAL)
-#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_AUTOREACTIVE)
+#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION)
void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive)
{
rw_output_autoreactive[iPxPos] = fReactive.x;
@@ -781,7 +877,7 @@ void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FF
#endif
#endif
-#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR)
void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
{
rw_output_prev_color_pre_alpha[iPxPos] = color;
@@ -789,11 +885,76 @@ void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FF
}
#endif
-#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR)
void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
{
rw_output_prev_color_post_alpha[iPxPos] = color;
}
#endif
+FfxFloat32x2 SPD_LoadExposureBuffer()
+{
+#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+ return rw_auto_exposure[FfxInt32x2(0, 0)];
+#else
+ return FfxFloat32x2(0.f, 0.f);
+#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+}
+
+void SPD_SetExposureBuffer(FfxFloat32x2 value)
+{
+#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+ rw_auto_exposure[FfxInt32x2(0, 0)] = value;
+#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+}
+
+FfxFloat32x4 SPD_LoadMipmap5(FfxInt32x2 iPxPos)
+{
+#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+ return FfxFloat32x4(rw_img_mip_5[iPxPos], 0, 0, 0);
+#else
+ return FfxFloat32x4(0.f, 0.f, 0.f, 0.f);
+#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+}
+
+void SPD_SetMipmap(FfxInt32x2 iPxPos, FfxUInt32 slice, FfxFloat32 value)
+{
+ switch (slice)
+ {
+ case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL:
+#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
+ rw_img_mip_shading_change[iPxPos] = value;
+#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
+ break;
+ case 5:
+#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+ rw_img_mip_5[iPxPos] = value;
+#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+ break;
+ default:
+
+ // avoid flattened side effect
+#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE)
+ rw_img_mip_shading_change[iPxPos] = rw_img_mip_shading_change[iPxPos];
+#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5)
+ rw_img_mip_5[iPxPos] = rw_img_mip_5[iPxPos];
+#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+ break;
+ }
+}
+
+void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter)
+{
+#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+ InterlockedAdd(rw_spd_global_atomic[FfxInt32x2(0, 0)], 1, spdCounter);
+#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+}
+
+void SPD_ResetAtomicCounter()
+{
+#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+ rw_spd_global_atomic[FfxInt32x2(0, 0)] = 0;
+#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+}
+
#endif // #if defined(FFX_GPU)
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_common.h
similarity index 94%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h
rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_common.h
index 0c72aa84943e..22d7f48ce467 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_common.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -508,6 +509,32 @@ FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure)
return fRgb;
}
+#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1)
+
+void PrepareRgbPaired(inout FFX_MIN16_F2 r, inout FFX_MIN16_F2 g, inout FFX_MIN16_F2 b, FfxFloat32 fExposure, FfxFloat32 fPreExposure)
+{
+ FFX_MIN16_F ExposureOverPreExposureOver = FFX_MIN16_F(fExposure / fPreExposure);
+
+ r *= ExposureOverPreExposureOver;
+ g *= ExposureOverPreExposureOver;
+ b *= ExposureOverPreExposureOver;
+
+ r = ffxClampHalf(r, 0.0, FSR2_FP16_MAX);
+ g = ffxClampHalf(g, 0.0, FSR2_FP16_MAX);
+ b = ffxClampHalf(b, 0.0, FSR2_FP16_MAX);
+}
+
+void UnprepareRgbPaired(inout FFX_MIN16_F2 r, inout FFX_MIN16_F2 g, inout FFX_MIN16_F2 b, FfxFloat32 fExposure)
+{
+ FFX_MIN16_F PreExposureOverExposure = FFX_MIN16_F(PreExposure() / fExposure);
+
+ r *= PreExposureOverExposure;
+ g *= PreExposureOverExposure;
+ b *= PreExposureOverExposure;
+}
+
+#endif
+
struct BilinearSamplingData
{
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_compute_luminance_pyramid.h
similarity index 84%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h
rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_compute_luminance_pyramid.h
index c63f1820e08d..d8d482018ef4 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_compute_luminance_pyramid.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -21,6 +22,21 @@
FFX_GROUPSHARED FfxUInt32 spdCounter;
+void SpdIncreaseAtomicCounter(FfxUInt32 slice)
+{
+ SPD_IncreaseAtomicCounter(spdCounter);
+}
+
+FfxUInt32 SpdGetAtomicCounter()
+{
+ return spdCounter;
+}
+
+void SpdResetAtomicCounter(FfxUInt32 slice)
+{
+ SPD_ResetAtomicCounter();
+}
+
#ifndef SPD_PACKED_ONLY
FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16];
FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16];
@@ -74,21 +90,6 @@ void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32
}
}
-void SpdIncreaseAtomicCounter(FfxUInt32 slice)
-{
- SPD_IncreaseAtomicCounter(spdCounter);
-}
-
-FfxUInt32 SpdGetAtomicCounter()
-{
- return spdCounter;
-}
-
-void SpdResetAtomicCounter(FfxUInt32 slice)
-{
- SPD_ResetAtomicCounter();
-}
-
FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
{
return FfxFloat32x4(
@@ -112,40 +113,24 @@ FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFl
// define fetch and store functions Packed
#if FFX_HALF
-#error Callback must be implemented
FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16];
FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16];
FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice)
{
- return FfxFloat16x4(imgDst[0][FfxFloat32x3(tex, slice)]);
+ return FfxFloat16x4(0, 0, 0, 0);
}
+
FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice)
{
- return FfxFloat16x4(imgDst6[FfxUInt32x3(p, slice)]);
+ return FfxFloat16x4(0, 0, 0, 0);
}
+
void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice)
{
- if (index == LumaMipLevelToUse() || index == 5)
- {
- imgDst6[FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
- return;
- }
- imgDst[mip + 1][FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
-}
-void SpdIncreaseAtomicCounter(FfxUInt32 slice)
-{
- InterlockedAdd(rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice], 1, spdCounter);
-}
-FfxUInt32 SpdGetAtomicCounter()
-{
- return spdCounter;
-}
-void SpdResetAtomicCounter(FfxUInt32 slice)
-{
- rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice] = 0;
}
+
FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y)
{
return FfxFloat16x4(
@@ -154,18 +139,20 @@ FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y)
spdIntermediateBA[x][y].x,
spdIntermediateBA[x][y].y);
}
+
void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value)
{
spdIntermediateRG[x][y] = value.xy;
spdIntermediateBA[x][y] = value.zw;
}
+
FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3)
{
return (v0 + v1 + v2 + v3) * FfxFloat16(0.25);
}
#endif
-#include "ffx_spd.h"
+#include "../spd/ffx_spd.h"
void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex)
{
@@ -186,4 +173,4 @@ void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex)
FfxUInt32(WorkGroupId.z),
FfxUInt32x2(WorkGroupOffset()));
#endif
-}
\ No newline at end of file
+}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_depth_clip.h
similarity index 97%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h
rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_depth_clip.h
index fa4c975a23fd..873ff4b72d77 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_depth_clip.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -255,4 +256,4 @@ void DepthClip(FfxInt32x2 iPxPos)
PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence));
}
-#endif //!defined( FFX_FSR2_DEPTH_CLIPH )
\ No newline at end of file
+#endif //!defined( FFX_FSR2_DEPTH_CLIPH )
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_lock.h
similarity index 93%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h
rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_lock.h
index 8347fa86bcdf..61030f6bb84c 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_lock.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_postprocess_lock_status.h
similarity index 95%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h
rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_postprocess_lock_status.h
index cee9e148ba16..90ef34423eeb 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_postprocess_lock_status.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
diff --git a/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_rcas.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_rcas.h
new file mode 100644
index 000000000000..f4c976bbec0e
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_rcas.h
@@ -0,0 +1,121 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#define GROUP_SIZE 8
+#define FSR_RCAS_DENOISE 1
+
+#include "../ffx_core.h"
+
+#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1)
+ #define FSR_RCAS_PREFER_PAIRED_VERSION 1
+#else
+ #define FSR_RCAS_PREFER_PAIRED_VERSION 0
+#endif
+
+void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor)
+{
+ StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor);
+}
+
+#if FSR_RCAS_PREFER_PAIRED_VERSION
+
+ #define FSR_RCAS_HX2 1
+
+ FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p)
+ {
+ return FfxFloat16x4(LoadRCAS_Input(p));
+ }
+ void FsrRcasInputHx2(inout FfxFloat16x2 r, inout FfxFloat16x2 g, inout FfxFloat16x2 b)
+ {
+ PrepareRgbPaired(r, g, b, Exposure(), PreExposure());
+ }
+
+#else
+
+ #define FSR_RCAS_F 1
+ FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p)
+ {
+ FfxFloat32x4 fColor = LoadRCAS_Input(p);
+
+ fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure());
+
+ return fColor;
+ }
+ void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {}
+
+#endif // #if FSR_RCAS_PREFER_PAIRED_VERSION
+
+// GODOT BEGINS
+// Workaround for Godot GLSL processor not supporting conditional include.
+// Thus we have to take the include statement out of the conditional block.
+
+#include "../fsr1/ffx_fsr1.h"
+
+#if FSR_RCAS_PREFER_PAIRED_VERSION
+
+void CurrFilterPaired(FFX_MIN16_U2 pos)
+{
+ FfxFloat16x2 cr;
+ FfxFloat16x2 cg;
+ FfxFloat16x2 cb;
+ FsrRcasHx2(cr, cg, cb, pos, RCASConfig());
+
+ UnprepareRgbPaired(cr, cg, cb, Exposure());
+
+ WriteUpscaledOutput(pos, FfxFloat16x3(cr.x, cg.x, cb.x)); //TODO: fix type
+ pos.x += 8;
+ WriteUpscaledOutput(pos, FfxFloat16x3(cr.y, cg.y, cb.y)); //TODO: fix type
+}
+
+#else
+
+void CurrFilter(FFX_MIN16_U2 pos)
+{
+ FfxFloat32x3 c;
+ FsrRcasF(c.r, c.g, c.b, pos, RCASConfig());
+
+ c = UnprepareRgb(c, Exposure());
+
+ WriteUpscaledOutput(pos, c);
+}
+
+#endif // #if FSR_RCAS_PREFER_PAIRED_VERSION
+// GODOT ENDS
+
+void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid)
+{
+ // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
+ FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u);
+#if FSR_RCAS_PREFER_PAIRED_VERSION
+ CurrFilterPaired(FFX_MIN16_U2(gxy));
+ gxy.y += 8u;
+ CurrFilterPaired(FFX_MIN16_U2(gxy));
+#else
+ CurrFilter(FFX_MIN16_U2(gxy));
+ gxy.x += 8u;
+ CurrFilter(FFX_MIN16_U2(gxy));
+ gxy.y += 8u;
+ CurrFilter(FFX_MIN16_U2(gxy));
+ gxy.x -= 8u;
+ CurrFilter(FFX_MIN16_U2(gxy));
+#endif
+}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
similarity index 95%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
index e9ccc4bc8c28..1a4305d772a2 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_reproject.h
similarity index 95%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h
rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_reproject.h
index f7f396129e14..386b29771b9c 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_reproject.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_resources.h
similarity index 97%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h
rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_resources.h
index 535dbc383c7b..a597c5a5aed9 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_resources.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_sample.h
similarity index 91%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_sample.h
index f94f40aa793c..cd7142af53eb 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_sample.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -138,14 +139,6 @@ FfxFloat32 Lanczos2(FfxFloat32 x)
#if FFX_HALF
-#if 0
-FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x)
-{
- const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants
- return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x));
-}
-#endif
-
FFX_MIN16_F Lanczos2(FFX_MIN16_F x)
{
x = ffxMin(abs(x), FFX_MIN16_F(2.0f));
@@ -168,6 +161,26 @@ FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2)
FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1);
return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b);
}
+
+#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1)
+
+FFX_MIN16_F2 PairedLanczos2ApproxSqNoClamp(FFX_MIN16_F2 x2)
+{
+ // Xbox ATG (Pavel):
+ //
+ // 2.0 * x2 - 5.0 25.0 25.0 - 16.0 (2.0 * x2 - 5.0)^2 - (3.0)^2 (2.0 * x2 - 8.0) * (2.0 * x2 - 2.0) (x2 - 4.0) * (x2 - 1.0)
+ // a = -------------- ==> ---- * a^2 - -------------- = ----------------------------- = ---------------------------------- = ----------------------- = b * (x2 - 1.0)
+ // 5.0 16.0 16.0 16.0 16.0 4.0
+ //
+ // so we need to compute just (b * b) * (b * x2 - b), so we should get four packed instructions: 2 fma + 2 mul
+ //
+
+ FFX_MIN16_F2 b = (0.25 * x2 - 1.0);
+ return (b * b) * (b * x2 - b);
+}
+
+#endif
+
#endif //FFX_HALF
FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2)
@@ -182,6 +195,15 @@ FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2)
x2 = ffxMin(x2, FFX_MIN16_F(4.0f));
return Lanczos2ApproxSqNoClamp(x2);
}
+
+#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1)
+FFX_MIN16_F2 PairedLanczos2ApproxSq(FFX_MIN16_F2 x2)
+{
+ x2 = ffxMin(x2, FFX_MIN16_F2(4.0, 4.0));
+ return PairedLanczos2ApproxSqNoClamp(x2);
+}
+#endif
+
#endif //FFX_HALF
FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x)
@@ -218,6 +240,21 @@ FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x)
{
return FFX_MIN16_F(SampleLanczos2Weight(abs(x)));
}
+
+#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1)
+
+FFX_MIN16_F Lanczos2_UseLUTNoAbs(FFX_MIN16_F x)
+{
+ return SampleLanczos2Weight_NoValu(x);
+}
+
+FFX_MIN16_F Lanczos2_UseLUTNoAbsNoA16(FfxFloat32 x)
+{
+ return SampleLanczos2Weight_NoValuNoA16(x);
+}
+
+#endif
+
#endif //FFX_HALF
FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
@@ -363,6 +400,19 @@ FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
}
#if FFX_HALF
+
+#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1)
+FFX_MIN16_F4 Lanczos2ApplyWeightX(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F2 fWeight0, FFX_MIN16_F2 fWeight1, FFX_MIN16_F2 fWeight2, FFX_MIN16_F2 fWeight3, FFX_MIN16_F2 fWeightSumInverted)
+{
+ return (((fWeight0.x * fColor0) + fWeight1.x * fColor1) + ((fWeight2.x * fColor2) + fWeight3.x * fColor3)) * fWeightSumInverted.x;
+}
+
+FFX_MIN16_F4 Lanczos2ApplyWeightY(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F2 fWeight0, FFX_MIN16_F2 fWeight1, FFX_MIN16_F2 fWeight2, FFX_MIN16_F2 fWeight3, FFX_MIN16_F2 fWeightSumInverted)
+{
+ return (((fWeight0.y * fColor0) + fWeight1.y * fColor1) + ((fWeight2.y * fColor2) + fWeight3.y * fColor3)) * fWeightSumInverted.y;
+}
+#endif
+
FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
{
FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_tcr_autogen.h
similarity index 97%
rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h
rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_tcr_autogen.h
index 101b75d25e4d..10970eff3ffb 100644
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_tcr_autogen.h
@@ -1,13 +1,14 @@
// This file is part of the FidelityFX SDK.
//
-// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
-//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
+// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
+// furnished to do so, subject to the following conditions :
+//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
@@ -52,7 +53,7 @@ FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevI
FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1))));
// cleanup very small values
- retVal = (retVal < getTcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f);
+ retVal = (retVal < TcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f);
return retVal;
}
diff --git a/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_upsample.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_upsample.h
new file mode 100644
index 000000000000..2281d986dbd3
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_upsample.h
@@ -0,0 +1,460 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FSR2_UPSAMPLE_H
+#define FFX_FSR2_UPSAMPLE_H
+
+FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16;
+
+void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor)
+{
+ fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
+}
+#if FFX_HALF
+void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor)
+{
+ fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
+}
+#endif
+
+#ifndef FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE
+#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate
+#endif
+
+FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight)
+{
+ FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
+#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
+ FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
+#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
+ FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
+#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
+ FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
+#else
+#error "Invalid Lanczos type"
+#endif
+ return fSampleWeight;
+}
+
+#if FFX_HALF
+FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight)
+{
+ FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
+#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
+ FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
+#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
+ FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
+#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
+ FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
+
+ // To Test: Save reciproqual sqrt compute
+ // FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
+#else
+#error "Invalid Lanczos type"
+#endif
+ return fSampleWeight;
+}
+#endif
+
+FfxFloat32 ComputeMaxKernelWeight() {
+ const FfxFloat32 fKernelSizeBias = 1.0f;
+
+ FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias);
+
+ return ffxMin(FfxFloat32(1.99f), fKernelWeight);
+}
+
+
+#if FFX_HALF && (FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2) && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1)
+#define FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS 1
+#else
+#define FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS 0
+#endif
+
+#if FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS
+
+FFX_MIN16_F2 Bool2ToFloat16x2(bool x, bool y)
+{
+ uint lo = x ? 0x00003c00 : 0x00000000;
+ uint hi = y ? 0x3c000000 : 0x00000000;
+
+ return FFX_MIN16_F2(__XB_AsHalf(lo).x, __XB_AsHalf(hi).y);
+}
+
+struct PairedRectificationBoxAndAccumulatedColorAndWeight
+{
+ FFX_MIN16_F2 aabbMinRG;
+ FFX_MIN16_F2 aabbMinB;
+
+ FFX_MIN16_F2 aabbMaxRG;
+ FFX_MIN16_F2 aabbMaxB;
+
+ FFX_MIN16_F2 boxCenterRG;
+ FFX_MIN16_F2 boxCenterB;
+
+ FFX_MIN16_F2 boxVecRG;
+ FFX_MIN16_F2 boxVecB;
+
+ FFX_MIN16_F2 fBoxCenterWeight;
+
+ FFX_MIN16_F2 fColorRG;
+ FFX_MIN16_F2 fColorB;
+ FFX_MIN16_F2 fWeight;
+
+ FFX_MIN16_F fKernelBiasSq;
+ FfxFloat32 fRectificationCurveBias;
+
+ void setKernelBiasAndRectificationCurveBias(FfxFloat32 kernelBias, FfxFloat32 rectificationCurveBias)
+ {
+ fKernelBiasSq = FFX_MIN16_F(kernelBias * kernelBias);
+ fRectificationCurveBias = rectificationCurveBias;
+ }
+
+ void init(FFX_MIN16_F fSrcSampleOffsetSq, bool sampleOnScreenX, bool sampleOnScreenY, FFX_MIN16_F3 colorSample)
+ {
+ // NOTE: make sure exp has 32-bit precision
+ const FFX_MIN16_F fBoxSampleWeight = FFX_MIN16_F(
+ exp(fRectificationCurveBias * FfxFloat32(fSrcSampleOffsetSq))
+ );
+
+#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
+ const FFX_MIN16_F2 LanczosUpsampleWeight = PairedLanczos2ApproxSq(fSrcSampleOffsetSq * fKernelBiasSq);
+#else
+#error "Only LANCZOS_TYPE_APPROXIMATE is supported in paired version so far"
+#endif
+ const FFX_MIN16_F2 fSampleWeight = FFX_MIN16_F2((sampleOnScreenX && sampleOnScreenY ? 1.0 : 0.0), 0.0) * LanczosUpsampleWeight;
+
+ aabbMinRG = colorSample.rg;
+ aabbMinB = colorSample.bb;
+
+ aabbMaxRG = colorSample.rg;
+ aabbMaxB = colorSample.bb;
+
+ boxCenterRG = colorSample.rg * fBoxSampleWeight.x;
+ boxCenterB = colorSample.bb * fBoxSampleWeight;
+
+ boxVecRG = colorSample.rg * boxCenterRG;
+ boxVecB = colorSample.bb * boxCenterB;
+
+ fBoxCenterWeight = fBoxSampleWeight;
+
+ fColorRG = colorSample.rg * fSampleWeight.x;
+ fColorB = colorSample.bb * fSampleWeight;
+ fWeight = fSampleWeight;
+ }
+
+ void addSample(FFX_MIN16_F2 fSrcSampleOffsetSq, bool sample0OnScreen, bool sample1OnScreen, bool sample01OnScreen, FFX_MIN16_F3 ColorSample0, FFX_MIN16_F3 ColorSample1)
+ {
+ // NOTE: make sure exp has 32-bit precision
+ const FFX_MIN16_F2 fBoxSampleWeight = FFX_MIN16_F2(
+ exp(fRectificationCurveBias * FfxFloat32(fSrcSampleOffsetSq.x)),
+ exp(fRectificationCurveBias * FfxFloat32(fSrcSampleOffsetSq.y))
+ );
+
+#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
+ const FFX_MIN16_F2 LanczosUpsampleWeight = PairedLanczos2ApproxSq(fSrcSampleOffsetSq * fKernelBiasSq);
+#else
+#error "Only LANCZOS_TYPE_APPROXIMATE is supported in paired version so far"
+#endif
+ const FFX_MIN16_F2 fSampleWeight = Bool2ToFloat16x2(sample0OnScreen && sample01OnScreen, sample1OnScreen && sample01OnScreen) * LanczosUpsampleWeight;
+
+ FFX_MIN16_F2 colorSampleB = FFX_MIN16_F2(ColorSample0.b, ColorSample1.b);
+
+ aabbMinRG = ffxMin(aabbMinRG, ColorSample0.rg);
+ aabbMinRG = ffxMin(aabbMinRG, ColorSample1.rg);
+ aabbMinB = ffxMin(aabbMinB, colorSampleB);
+
+ aabbMaxRG = ffxMax(aabbMaxRG, ColorSample0.rg);
+ aabbMaxRG = ffxMax(aabbMaxRG, ColorSample1.rg);
+ aabbMaxB = ffxMax(aabbMaxB, colorSampleB);
+
+ FFX_MIN16_F2 weightedColorSampleRG0 = ColorSample0.rg * fBoxSampleWeight.x;
+ FFX_MIN16_F2 weightedColorSampleRG1 = ColorSample1.rg * fBoxSampleWeight.y;
+ FFX_MIN16_F2 weightedColorSampleB = colorSampleB * fBoxSampleWeight;
+
+ boxCenterRG += weightedColorSampleRG0;
+ boxCenterRG += weightedColorSampleRG1;
+ boxCenterB += weightedColorSampleB;
+
+ boxVecRG += ColorSample0.rg * weightedColorSampleRG0;
+ boxVecRG += ColorSample1.rg * weightedColorSampleRG1;
+ boxVecB += colorSampleB * weightedColorSampleB;
+
+ fBoxCenterWeight += fBoxSampleWeight;
+
+ fWeight += fSampleWeight;
+ fColorRG += (ColorSample0.rg * fSampleWeight.x) + (ColorSample1.rg * fSampleWeight.y);
+ fColorB += colorSampleB * fSampleWeight;
+ }
+
+ void finalize(FFX_PARAMETER_INOUT RectificationBox rectificationBox, FFX_PARAMETER_INOUT FfxFloat32x4 outColorAndWeight)
+ {
+ rectificationBox.aabbMin.r = FfxFloat32(aabbMinRG.x);
+ rectificationBox.aabbMin.g = FfxFloat32(aabbMinRG.y);
+ rectificationBox.aabbMin.b = FfxFloat32(ffxMin(aabbMinB.x, aabbMinB.y));
+
+ rectificationBox.aabbMax.r = FfxFloat32(aabbMaxRG.x);
+ rectificationBox.aabbMax.g = FfxFloat32(aabbMaxRG.y);
+ rectificationBox.aabbMax.b = FfxFloat32(ffxMax(aabbMaxB.x, aabbMaxB.y));
+
+ rectificationBox.boxCenter.r = FfxFloat32(boxCenterRG.x);
+ rectificationBox.boxCenter.g = FfxFloat32(boxCenterRG.y);
+ rectificationBox.boxCenter.b = FfxFloat32(boxCenterB.x + boxCenterB.y);
+
+ rectificationBox.boxVec.r = FfxFloat32(boxVecRG.x);
+ rectificationBox.boxVec.g = FfxFloat32(boxVecRG.y);
+ rectificationBox.boxVec.b = FfxFloat32(boxVecB.x + boxVecB.y);
+
+ rectificationBox.fBoxCenterWeight = FfxFloat32(fBoxCenterWeight.x + fBoxCenterWeight.y);
+
+ outColorAndWeight = FfxFloat32x4(fColorRG, fColorB.x + fColorB.y, fWeight.x + fWeight.y);
+ }
+};
+#endif
+
+FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params,
+ FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor)
+{
+ // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly)
+ FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position
+ FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position
+ FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors...
+
+#if FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS
+ FFX_MIN16_F3 fSamples[iLanczos2SampleCount];
+#else
+ FfxFloat32x3 fSamples[iLanczos2SampleCount];
+#endif
+
+ FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0
+
+ FfxInt32x2 offsetTL;
+ offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1);
+ offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1);
+
+ //Load samples
+ // If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3].
+ // Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox.
+ // This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values.
+ const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y;
+ const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x;
+
+#if FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS
+ // Unroll the loop to load samples on Scarlett to help the shader compiler
+ const FFX_MIN16_F2 fSampleOffsetX02 = __XB_AsHalf(bFlipCol ? __XB_AsUInt(FFX_MIN16_F2( 1, -1)) : __XB_AsUInt(FFX_MIN16_F2(-1, 1)));
+ const FFX_MIN16_F2 fSampleOffsetY02 = __XB_AsHalf(bFlipRow ? __XB_AsUInt(FFX_MIN16_F2( 1, -1)) : __XB_AsUInt(FFX_MIN16_F2(-1, 1)));
+
+ typedef FfxInt32 FfxTexCoordI;
+ typedef FfxInt32x2 FfxTexCoordI2;
+
+ const FfxTexCoordI2 iSrcSamplePosX01 = FfxTexCoordI2(iSrcInputPos.xx) + (bFlipCol ? FfxTexCoordI2( 1, 0) : FfxTexCoordI2(-1, 0));
+ const FfxTexCoordI2 iSrcSamplePosX23 = FfxTexCoordI2(iSrcInputPos.xx) + (bFlipCol ? FfxTexCoordI2(-1, -2) : FfxTexCoordI2( 1, 2));
+
+ const FfxTexCoordI2 iSrcSamplePosY01 = FfxTexCoordI2(iSrcInputPos.yy) + (bFlipRow ? FfxTexCoordI2( 1, 0) : FfxTexCoordI2(-1, 0));
+ const FfxTexCoordI2 iSrcSamplePosY23 = FfxTexCoordI2(iSrcInputPos.yy) + (bFlipRow ? FfxTexCoordI2(-1, -2) : FfxTexCoordI2( 1, 2));
+
+ const FfxTexCoordI2 renderSizeLastTexelCoord = FfxTexCoordI2(RenderSize()) - FfxTexCoordI2(1, 1);
+
+ const FfxTexCoordI2 iSrcSamplePosX01Clamped = FfxTexCoordI2(
+ __XB_Med3_I32(iSrcSamplePosX01.x, 0, renderSizeLastTexelCoord.x),
+ __XB_Med3_I32(iSrcSamplePosX01.y, 0, renderSizeLastTexelCoord.x)
+ );
+
+ const FfxTexCoordI2 iSrcSamplePosX23Clamped = FfxTexCoordI2(
+ __XB_Med3_I32(iSrcSamplePosX23.x, 0, renderSizeLastTexelCoord.x),
+ __XB_Med3_I32(iSrcSamplePosX23.y, 0, renderSizeLastTexelCoord.x)
+ );
+
+ const FfxTexCoordI2 iSrcSamplePosY01Clamped = FfxTexCoordI2(
+ __XB_Med3_I32(iSrcSamplePosY01.x, 0, renderSizeLastTexelCoord.y),
+ __XB_Med3_I32(iSrcSamplePosY01.y, 0, renderSizeLastTexelCoord.y)
+ );
+
+ const FfxTexCoordI2 iSrcSamplePosY23Clamped = FfxTexCoordI2(
+ __XB_Med3_I32(iSrcSamplePosY23.x, 0, renderSizeLastTexelCoord.y),
+ __XB_Med3_I32(iSrcSamplePosY23.y, 0, renderSizeLastTexelCoord.y)
+ );
+
+ fSamples[ 0] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY01Clamped.x));
+ fSamples[ 1] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY01Clamped.x));
+ fSamples[ 2] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY01Clamped.x));
+
+ fSamples[4 + 0] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY01Clamped.y));
+ fSamples[4 + 1] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY01Clamped.y));
+ fSamples[4 + 2] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY01Clamped.y));
+
+ fSamples[8 + 0] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY23Clamped.x));
+ fSamples[8 + 1] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY23Clamped.x));
+ fSamples[8 + 2] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY23Clamped.x));
+
+ fSamples[12 + 0] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY23Clamped.y));
+ fSamples[12 + 1] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY23Clamped.y));
+ fSamples[12 + 2] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY23Clamped.y));
+
+#else
+ FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL);
+
+ FFX_UNROLL
+ for (FfxInt32 row = 0; row < 3; row++) {
+
+ FFX_UNROLL
+ for (FfxInt32 col = 0; col < 3; col++) {
+ FfxInt32 iSampleIndex = col + (row << 2);
+
+ FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
+ FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow;
+
+ const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize()));
+
+ fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord));
+ }
+ }
+#endif
+
+ FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos);
+
+ // Identify how much of each upsampled color to be used for this frame
+ const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample));
+ const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor);
+
+ const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f));
+ const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor));
+ const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor);
+
+ const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f));
+
+#if FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS
+ // Unroll the loop to load samples on Scarlett to help the shader compiler
+ const bool coordX0OnScreen = iSrcSamplePosX01.x == iSrcSamplePosX01Clamped.x;
+ const bool coordX1OnScreen = iSrcSamplePosX01.y == iSrcSamplePosX01Clamped.y;
+ const bool coordX2OnScreen = iSrcSamplePosX23.x == iSrcSamplePosX23Clamped.x;
+
+ const bool coordY0OnScreen = iSrcSamplePosY01.x == iSrcSamplePosY01Clamped.x;
+ const bool coordY1OnScreen = iSrcSamplePosY01.y == iSrcSamplePosY01Clamped.y;
+ const bool coordY2OnScreen = iSrcSamplePosY23.x == iSrcSamplePosY23Clamped.x;
+
+ const FFX_MIN16_F2 fBaseSampleOffsetHalf = FFX_MIN16_F2(fBaseSampleOffset);
+
+ const FFX_MIN16_F2 fSrcSampleOffsetX_02 = fBaseSampleOffsetHalf.xx + fSampleOffsetX02;
+ const FFX_MIN16_F2 fSrcSampleOffsetY_02 = fBaseSampleOffsetHalf.yy + fSampleOffsetY02;
+
+ const FFX_MIN16_F2 fSrcSampleOffsetXSq_02 = fSrcSampleOffsetX_02 * fSrcSampleOffsetX_02;
+ const FFX_MIN16_F2 fSrcSampleOffsetYSq_02 = fSrcSampleOffsetY_02 * fSrcSampleOffsetY_02;
+ const FFX_MIN16_F2 fSrcSampleOffsetXYSq_11 = fBaseSampleOffsetHalf * fBaseSampleOffsetHalf;
+
+ PairedRectificationBoxAndAccumulatedColorAndWeight pairedBox;
+ pairedBox.setKernelBiasAndRectificationCurveBias(fKernelBias, fRectificationCurveBias);
+
+ // init by o o o
+ // o x o
+ // o o o
+ pairedBox.init(
+ fSrcSampleOffsetXYSq_11.x + fSrcSampleOffsetXYSq_11.y,
+ coordX1OnScreen, coordY1OnScreen,
+ fSamples[5]
+ );
+
+ // add remaining two samples from 1st row x o x
+ // o * o
+ // o o o
+ pairedBox.addSample(
+ fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.xx,
+ coordX0OnScreen, coordX2OnScreen, coordY0OnScreen,
+ fSamples[0 + 0], fSamples[0 + 2]
+ );
+
+ // add two samples from 2nd row * o *
+ // o * o
+ // x o x
+ pairedBox.addSample(
+ fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.yy,
+ coordX0OnScreen, coordX2OnScreen, coordY2OnScreen,
+ fSamples[8 + 0], fSamples[8 + 2]
+ );
+
+ // add two samples from 3rd row * o *
+ // x * x
+ // * o *
+ pairedBox.addSample(
+ fSrcSampleOffsetXSq_02 + fSrcSampleOffsetXYSq_11.yy,
+ coordX0OnScreen, coordX2OnScreen, coordY1OnScreen,
+ fSamples[4 + 0], fSamples[4 + 2]
+ );
+
+ // add remaining samples * x *
+ // * * *
+ // * x *
+ pairedBox.addSample(
+ fSrcSampleOffsetXYSq_11.xx + fSrcSampleOffsetYSq_02,
+ coordY0OnScreen, coordY2OnScreen, coordX1OnScreen,
+ fSamples[0 + 1], fSamples[8 + 1]
+ );
+
+ pairedBox.finalize(clippingBox, fColorAndWeight);
+#else
+ FFX_UNROLL
+ for (FfxInt32 row = 0; row < 3; row++) {
+ FFX_UNROLL
+ for (FfxInt32 col = 0; col < 3; col++) {
+ FfxInt32 iSampleIndex = col + (row << 2);
+
+ const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
+ const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow);
+ FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset;
+
+ FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow;
+
+ const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize())));
+ FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias));
+
+ fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight);
+
+ // Update rectification box
+ {
+ const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset);
+ const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq);
+
+ const FfxBoolean bInitialSample = (row == 0) && (col == 0);
+ RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight);
+ }
+ }
+ }
+#endif
+
+ RectificationBoxComputeVarianceBoxData(clippingBox);
+
+ fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR2_EPSILON);
+
+ if (fColorAndWeight.w > FSR2_EPSILON) {
+ // Normalize for deringing (we need to compare colors)
+ fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w;
+ fColorAndWeight.w *= fUpsampleLanczosWeightScale;
+
+ Deringing(clippingBox, fColorAndWeight.xyz);
+ }
+
+ return fColorAndWeight;
+}
+
+#endif //!defined( FFX_FSR2_UPSAMPLE_H )
diff --git a/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_callbacks_hlsl.h b/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_callbacks_hlsl.h
new file mode 100644
index 000000000000..b8240604f9c3
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_callbacks_hlsl.h
@@ -0,0 +1,24 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "../fsr2/ffx_fsr2_callbacks_hlsl.h"
+#include "ffx_fsr3_resources.h"
diff --git a/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_common.h b/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_common.h
new file mode 100644
index 000000000000..f8c935c7eeb8
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_common.h
@@ -0,0 +1,25 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "../fsr2/ffx_fsr2_common.h"
+#include "../frameinterpolation/ffx_frameinterpolation_common.h"
+#include "../opticalflow/ffx_opticalflow_common.h"
diff --git a/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_resources.h b/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_resources.h
new file mode 100644
index 000000000000..bb0614a4efbe
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_resources.h
@@ -0,0 +1,51 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FSR3_RESOURCES_H
+#define FFX_FSR3_RESOURCES_H
+
+#include "../fsr2/ffx_fsr2_resources.h"
+#include "../frameinterpolation/ffx_frameinterpolation_resources.h"
+
+#if defined(FFX_CPU) || defined(FFX_GPU)
+#define FFX_FSR3_RESOURCE_IDENTIFIER_NULL 0
+
+#define FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR 1
+#define FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT 2
+#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_0 5
+#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_0 6
+#define FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_0 7
+#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_1 8
+#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_1 9
+#define FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_1 10
+#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_2 11
+#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_2 12
+#define FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_2 13
+#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_3 14
+#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_3 15
+#define FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_3 16
+
+#define FFX_FSR3_RESOURCE_IDENTIFIER_COUNT 17
+#define FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT 3
+#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
+
+#endif //!defined( FFX_FSR2_RESOURCES_H )
diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_accumulate.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_accumulate.h
new file mode 100644
index 000000000000..a8c6daa0e6cb
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_accumulate.h
@@ -0,0 +1,172 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data)
+{
+ // Avoid invalid values when accumulation and upsampled weight is 0
+ data.fHistoryWeight *= FfxFloat32(data.fHistoryWeight > FSR3UPSCALER_FP16_MIN);
+ data.fHistoryWeight = ffxMax(FSR3UPSCALER_EPSILON, data.fHistoryWeight + data.fUpsampledWeight);
+
+#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT
+ //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation)
+ data.fUpsampledColor = RGBToYCoCg(Tonemap(YCoCgToRGB(data.fUpsampledColor)));
+ data.fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(data.fHistoryColor)));
+#endif
+
+ const FfxFloat32 fAlpha = ffxSaturate(data.fUpsampledWeight / data.fHistoryWeight);
+ data.fHistoryColor = ffxLerp(data.fHistoryColor, data.fUpsampledColor, fAlpha);
+ data.fHistoryColor = YCoCgToRGB(data.fHistoryColor);
+
+#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT
+ data.fHistoryColor = InverseTonemap(data.fHistoryColor);
+#endif
+}
+
+void RectifyHistory(
+ const AccumulationPassCommonParams params,
+ FFX_PARAMETER_INOUT AccumulationPassData data
+)
+{
+ const FfxFloat32 f4kVelocityFactor = ffxSaturate(params.f4KVelocity / 20.0f);
+ const FfxFloat32 fDistanceFactor = ffxSaturate(0.75f - params.fFarthestDepthInMeters / 20.0f);
+ const FfxFloat32 fAccumulationFactor = 1.0f - params.fAccumulation;
+ const FfxFloat32 fReactiveFactor = ffxPow(params.fReactiveMask, 1.0f / 2.0f);
+ const FfxFloat32 fShadingChangeFactor = params.fShadingChange;
+ const FfxFloat32 fBoxScaleT = ffxMax(f4kVelocityFactor, ffxMax(fDistanceFactor, ffxMax(fAccumulationFactor, ffxMax(fReactiveFactor, fShadingChangeFactor))));
+
+ const FfxFloat32 fBoxScale = ffxLerp(3.0f, 1.0f, fBoxScaleT);
+ const FfxFloat32x3 fScaledBoxVec = data.clippingBox.boxVec * FfxFloat32x3(1.7f, 1.0f, 1.0f) * fBoxScale;
+
+ const FfxFloat32x3 fClampedScaledBoxVec = ffxMax(fScaledBoxVec, FfxFloat32x3(1.193e-7f, 1.193e-7f, 1.193e-7f));
+ const FfxFloat32x3 fTransformedHistoryColor = (data.fHistoryColor - data.clippingBox.boxCenter) / fClampedScaledBoxVec;
+
+ if (length(fTransformedHistoryColor)>1.f) {
+ const FfxFloat32x3 fClampedHistoryColor = normalize(fTransformedHistoryColor);
+ const FfxFloat32x3 fFinalClampedHistoryColor = (fClampedHistoryColor * fScaledBoxVec) + data.clippingBox.boxCenter;
+
+ // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection
+ const FfxFloat32 fHistoryContribution = ffxMax(params.fLumaInstabilityFactor, data.fLockContributionThisFrame) * params.fAccumulation * (1 - params.fDisocclusion);
+ data.fHistoryColor = ffxLerp(fFinalClampedHistoryColor, data.fHistoryColor, ffxSaturate(fHistoryContribution));
+ }
+}
+
+void UpdateLockStatus(AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data)
+{
+ data.fLock *= FfxFloat32(params.bIsNewSample == false);
+
+ const FfxFloat32 fLifetimeDecreaseFactor = ffxMax(ffxSaturate(params.fShadingChange), ffxMax(params.fReactiveMask, params.fDisocclusion));
+ data.fLock = ffxMax(0.0f, data.fLock - fLifetimeDecreaseFactor * fLockMax);
+
+ // Compute this frame lock contribution
+ data.fLockContributionThisFrame = ffxSaturate(ffxSaturate(data.fLock - fLockThreshold) * (fLockMax - fLockThreshold));
+
+ const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos) * (1.0f - ffxMax(params.fShadingChange * 0, params.fReactiveMask));
+ data.fLock = ffxMax(0.0f, ffxMin(data.fLock + fNewLockIntensity, fLockMax));
+
+ // Preparing for next frame
+ const FfxFloat32 fLifetimeDecrease = (0.1f / JitterSequenceLength()) * (1.0f - fLifetimeDecreaseFactor);
+ data.fLock = ffxMax(0.0f, data.fLock - fLifetimeDecrease);
+
+ // we expect similar motion for next frame
+ // kill lock if that location is outside screen, avoid locks to be clamped to screen borders
+ const FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector;
+ data.fLock *= FfxFloat32(IsUvInside(fEstimatedUvNextFrame) == true);
+}
+
+void ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data)
+{
+ FfxFloat32 fBaseAccumulation = params.fAccumulation;
+
+ fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, 0.15f, ffxSaturate(ffxMax(0.0f, (params.f4KVelocity * VelocityFactor()) / 0.5f))));
+
+ data.fHistoryWeight = fBaseAccumulation;
+}
+
+void InitPassData(FfxInt32x2 iPxHrPos, FFX_PARAMETER_INOUT AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data)
+{
+ // Init constant params
+ params.iPxHrPos = iPxHrPos;
+ const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / UpscaleSize();
+ params.fHrUv = fHrUv;
+ params.fLrUvJittered = fHrUv + Jitter() / RenderSize();
+ params.fLrUv_HwSampler = ClampUv(params.fLrUvJittered, RenderSize(), MaxRenderSize());
+
+ params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv);
+ params.f4KVelocity = Get4KVelocity(params.fMotionVector);
+
+ ComputeReprojectedUVs(params);
+
+ const FfxFloat32x2 fLumaInstabilityUv_HW = ClampUv(fHrUv, RenderSize(), MaxRenderSize());
+ params.fLumaInstabilityFactor = SampleLumaInstability(fLumaInstabilityUv_HW);
+
+ const FfxFloat32x2 fFarthestDepthUv = ClampUv(params.fLrUvJittered, RenderSize() / 2, GetFarthestDepthMip1ResourceDimensions());
+ params.fFarthestDepthInMeters = SampleFarthestDepthMip1(fFarthestDepthUv);
+ params.bIsNewSample = (params.bIsExistingSample == false || 0 == FrameIndex());
+
+ const FfxFloat32x4 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler);
+ params.fReactiveMask = ffxSaturate(fDilatedReactiveMasks[REACTIVE]);
+ params.fDisocclusion = ffxSaturate(fDilatedReactiveMasks[DISOCCLUSION]);
+ params.fShadingChange = ffxSaturate(fDilatedReactiveMasks[SHADING_CHANGE]);
+ params.fAccumulation = ffxSaturate(fDilatedReactiveMasks[ACCUMULAION]);
+ params.fAccumulation *= FfxFloat32(round(params.fAccumulation * 100.0f) > 1.0f);
+
+ // Init variable data
+ data.fUpsampledColor = FfxFloat32x3(0.0f, 0.0f, 0.0f);
+ data.fHistoryColor = FfxFloat32x3(0.0f, 0.0f, 0.0f);
+ data.fHistoryWeight = 1.0f;
+ data.fUpsampledWeight = 0.0f;
+ data.fLock = 0.0f;
+ data.fLockContributionThisFrame = 0.0f;
+}
+
+void Accumulate(FfxInt32x2 iPxHrPos)
+{
+ AccumulationPassCommonParams params;
+ AccumulationPassData data;
+ InitPassData(iPxHrPos, params, data);
+
+ if (params.bIsExistingSample && !params.bIsNewSample) {
+ ReprojectHistoryColor(params, data);
+ }
+
+ UpdateLockStatus(params, data);
+
+ ComputeBaseAccumulationWeight(params, data);
+
+ ComputeUpsampledColorAndWeight(params, data);
+
+ RectifyHistory(params, data);
+
+ Accumulate(params, data);
+
+ data.fHistoryColor /= Exposure();
+
+ data.fHistoryColor = ffxMax(data.fHistoryColor, FfxFloat32x3(0.0f, 0.0f, 0.0f));
+
+ StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(data.fHistoryColor, data.fLock));
+
+ // Output final color when RCAS is disabled
+#if FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING == 0
+ StoreUpscaledOutput(iPxHrPos, data.fHistoryColor);
+#endif
+ StoreNewLocks(iPxHrPos, 0);
+}
diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h
new file mode 100644
index 000000000000..a0014ddc8b02
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h
@@ -0,0 +1,936 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "ffx_fsr3upscaler_resources.h"
+
+#if defined(FFX_GPU)
+#include "../ffx_core.h"
+#endif // #if defined(FFX_GPU)
+
+#if defined(FFX_GPU)
+#ifndef FFX_PREFER_WAVE64
+#define FFX_PREFER_WAVE64
+#endif // FFX_PREFER_WAVE64
+
+#if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER)
+ layout (set = 0, binding = FSR3UPSCALER_BIND_CB_FSR3UPSCALER, std140) uniform cbFSR3UPSCALER_t
+ {
+ FfxInt32x2 iRenderSize;
+ FfxInt32x2 iPreviousFrameRenderSize;
+
+ FfxInt32x2 iUpscaleSize;
+ FfxInt32x2 iPreviousFrameUpscaleSize;
+
+ FfxInt32x2 iMaxRenderSize;
+ FfxInt32x2 iMaxUpscaleSize;
+
+ FfxFloat32x4 fDeviceToViewDepth;
+
+ FfxFloat32x2 fJitter;
+ FfxFloat32x2 fPreviousFrameJitter;
+
+ FfxFloat32x2 fMotionVectorScale;
+ FfxFloat32x2 fDownscaleFactor;
+
+ FfxFloat32x2 fMotionVectorJitterCancellation;
+ FfxFloat32 fTanHalfFOV;
+ FfxFloat32 fJitterSequenceLength;
+
+ FfxFloat32 fDeltaTime;
+ FfxFloat32 fDeltaPreExposure;
+ FfxFloat32 fViewSpaceToMetersFactor;
+ FfxFloat32 fFrameIndex;
+
+ FfxFloat32 fVelocityFactor;
+ FfxFloat32 fReactivenessScale;
+ FfxFloat32 fShadingChangeScale;
+ FfxFloat32 fAccumulationAddedPerFrame;
+ FfxFloat32 fMinDisocclusionAccumulation;
+
+ // GODOT BEGINS
+ // Do not change this to an array, as arrays have different alignment requirements
+ FfxFloat32 fPad1;
+ FfxFloat32 fPad2;
+ FfxFloat32 fPad3;
+ mat4 mReprojectionMatrix;
+ // GODOT ENDS
+ } cbFSR3Upscaler;
+
+
+FfxInt32x2 RenderSize()
+{
+ return cbFSR3Upscaler.iRenderSize;
+}
+
+FfxInt32x2 PreviousFrameRenderSize()
+{
+ return cbFSR3Upscaler.iPreviousFrameRenderSize;
+}
+
+FfxInt32x2 MaxRenderSize()
+{
+ return cbFSR3Upscaler.iMaxRenderSize;
+}
+
+FfxInt32x2 UpscaleSize()
+{
+ return cbFSR3Upscaler.iUpscaleSize;
+}
+
+FfxInt32x2 PreviousFrameUpscaleSize()
+{
+ return cbFSR3Upscaler.iPreviousFrameUpscaleSize;
+}
+
+FfxInt32x2 MaxUpscaleSize()
+{
+ return cbFSR3Upscaler.iMaxUpscaleSize;
+}
+
+FfxFloat32x2 Jitter()
+{
+ return cbFSR3Upscaler.fJitter;
+}
+
+FfxFloat32x2 PreviousFrameJitter()
+{
+ return cbFSR3Upscaler.fPreviousFrameJitter;
+}
+
+FfxFloat32x4 DeviceToViewSpaceTransformFactors()
+{
+ return cbFSR3Upscaler.fDeviceToViewDepth;
+}
+
+FfxFloat32x2 MotionVectorScale()
+{
+ return cbFSR3Upscaler.fMotionVectorScale;
+}
+
+FfxFloat32x2 DownscaleFactor()
+{
+ return cbFSR3Upscaler.fDownscaleFactor;
+}
+
+FfxFloat32x2 MotionVectorJitterCancellation()
+{
+ return cbFSR3Upscaler.fMotionVectorJitterCancellation;
+}
+
+FfxFloat32 TanHalfFoV()
+{
+ return cbFSR3Upscaler.fTanHalfFOV;
+}
+
+FfxFloat32 JitterSequenceLength()
+{
+ return cbFSR3Upscaler.fJitterSequenceLength;
+}
+
+FfxFloat32 DeltaTime()
+{
+ return cbFSR3Upscaler.fDeltaTime;
+}
+
+FfxFloat32 DeltaPreExposure()
+{
+ return cbFSR3Upscaler.fDeltaPreExposure;
+}
+
+FfxFloat32 ViewSpaceToMetersFactor()
+{
+ return cbFSR3Upscaler.fViewSpaceToMetersFactor;
+}
+
+FfxFloat32 FrameIndex()
+{
+ return cbFSR3Upscaler.fFrameIndex;
+}
+
+FfxFloat32 VelocityFactor()
+{
+ return cbFSR3Upscaler.fVelocityFactor;
+}
+
+FfxFloat32 AccumulationAddedPerFrame()
+{
+ return cbFSR3Upscaler.fAccumulationAddedPerFrame;
+}
+
+FfxFloat32 MinDisocclusionAccumulation()
+{
+ return cbFSR3Upscaler.fMinDisocclusionAccumulation;
+}
+
+#endif // #if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER)
+
+
+#if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE)
+layout(set = 0, binding = FSR3UPSCALER_BIND_CB_AUTOREACTIVE, std140) uniform cbGenerateReactive_t
+{
+ FfxFloat32 fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels
+ FfxFloat32 fTcScale;
+ FfxFloat32 fReactiveScale;
+ FfxFloat32 fReactiveMax;
+} cbGenerateReactive;
+
+FfxFloat32 TcThreshold()
+{
+ return cbGenerateReactive.fTcThreshold;
+}
+
+FfxFloat32 TcScale()
+{
+ return cbGenerateReactive.fTcScale;
+}
+
+FfxFloat32 ReactiveScale()
+{
+ return cbGenerateReactive.fReactiveScale;
+}
+
+FfxFloat32 ReactiveMax()
+{
+ return cbGenerateReactive.fReactiveMax;
+}
+#endif // #if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE)
+
+#if defined(FSR3UPSCALER_BIND_CB_RCAS)
+layout(set = 0, binding = FSR3UPSCALER_BIND_CB_RCAS, std140) uniform cbRCAS_t
+{
+ FfxUInt32x4 rcasConfig;
+} cbRCAS;
+
+FfxUInt32x4 RCASConfig()
+{
+ return cbRCAS.rcasConfig;
+}
+#endif // #if defined(FSR3UPSCALER_BIND_CB_RCAS)
+
+
+#if defined(FSR3UPSCALER_BIND_CB_REACTIVE)
+layout(set = 0, binding = FSR3UPSCALER_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t
+{
+ FfxFloat32 gen_reactive_scale;
+ FfxFloat32 gen_reactive_threshold;
+ FfxFloat32 gen_reactive_binaryValue;
+ FfxUInt32 gen_reactive_flags;
+} cbGenerateReactive;
+
+FfxFloat32 GenReactiveScale()
+{
+ return cbGenerateReactive.gen_reactive_scale;
+}
+
+FfxFloat32 GenReactiveThreshold()
+{
+ return cbGenerateReactive.gen_reactive_threshold;
+}
+
+FfxFloat32 GenReactiveBinaryValue()
+{
+ return cbGenerateReactive.gen_reactive_binaryValue;
+}
+
+FfxUInt32 GenReactiveFlags()
+{
+ return cbGenerateReactive.gen_reactive_flags;
+}
+#endif // #if defined(FSR3UPSCALER_BIND_CB_REACTIVE)
+
+
+#if defined(FSR3UPSCALER_BIND_CB_SPD)
+layout(set = 0, binding = FSR3UPSCALER_BIND_CB_SPD, std140) uniform cbSPD_t
+{
+ FfxUInt32 mips;
+ FfxUInt32 numWorkGroups;
+ FfxUInt32x2 workGroupOffset;
+ FfxUInt32x2 renderSize;
+} cbSPD;
+
+FfxUInt32 MipCount()
+{
+ return cbSPD.mips;
+}
+
+FfxUInt32 NumWorkGroups()
+{
+ return cbSPD.numWorkGroups;
+}
+
+FfxUInt32x2 WorkGroupOffset()
+{
+ return cbSPD.workGroupOffset;
+}
+
+FfxUInt32x2 SPD_RenderSize()
+{
+ return cbSPD.renderSize;
+}
+#endif // #if defined(FSR3UPSCALER_BIND_CB_SPD)
+
+// GODOT BEGINS
+// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them.
+layout (set = 0, binding = 100 /*1000*/) uniform sampler s_PointClamp;
+layout (set = 0, binding = 101 /*1001*/) uniform sampler s_LinearClamp;
+// GODOT ENDS
+
+#if defined(FSR3UPSCALER_BIND_SRV_SPD_MIPS)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_SPD_MIPS) uniform texture2D r_spd_mips;
+
+FfxInt32x2 GetSPDMipDimensions(FfxUInt32 uMipLevel)
+{
+ return textureSize(r_spd_mips, int(uMipLevel)).xy;
+}
+
+FfxFloat32x2 SampleSPDMipLevel(FfxFloat32x2 fUV, FfxUInt32 mipLevel)
+{
+ return textureLod(sampler2D(r_spd_mips, s_LinearClamp), fUV, float(mipLevel)).rg;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth;
+
+FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos)
+{
+ return texelFetch(r_input_depth, iPxPos, 0).r;
+}
+
+FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_input_depth, s_LinearClamp), fUV, 0.0).x;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_REACTIVE_MASK) uniform texture2D r_reactive_mask;
+
+FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos)
+{
+ // GODOT BEGINS
+#if FFX_FSR3UPSCALER_OPTION_GODOT_REACTIVE_MASK_CLAMP
+ return min(texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r * cbFSR3Upscaler.fReactivenessScale, 0.9f);
+#else
+ return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r * cbFSR3Upscaler.fReactivenessScale;
+#endif
+ // GODOT ENDS
+}
+
+FfxInt32x2 GetReactiveMaskResourceDimensions()
+{
+ return textureSize(r_reactive_mask, 0).xy;
+}
+
+FfxFloat32 SampleReactiveMask(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_reactive_mask, s_LinearClamp), fUV, 0.0).x * cbFSR3Upscaler.fReactivenessScale;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) uniform texture2D r_transparency_and_composition_mask;
+
+FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
+{
+ return texelFetch(r_transparency_and_composition_mask, FfxInt32x2(iPxPos), 0).r;
+}
+
+FfxInt32x2 GetTransparencyAndCompositionMaskResourceDimensions()
+{
+ return textureSize(r_transparency_and_composition_mask, 0).xy;
+}
+
+FfxFloat32 SampleTransparencyAndCompositionMask(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_transparency_and_composition_mask, s_LinearClamp), fUV, 0.0).x;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_COLOR)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color_jittered;
+
+FfxFloat32x3 LoadInputColor(FfxInt32x2 iPxPos)
+{
+ return texelFetch(r_input_color_jittered, iPxPos, 0).rgb;
+}
+
+FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_input_color_jittered, s_LinearClamp), fUV, 0.0).rgb;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors;
+
+FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos)
+{
+ FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy;
+
+ // GODOT BEGINS
+#if FFX_FSR3UPSCALER_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
+ bool bInvalidMotionVector = all(lessThanEqual(fSrcMotionVector, vec2(-1.0f, -1.0f)));
+ if (bInvalidMotionVector)
+ {
+ FfxFloat32 fSrcDepth = LoadInputDepth(iPxDilatedMotionVectorPos);
+ FfxFloat32x2 fUv = (iPxDilatedMotionVectorPos + FfxFloat32(0.5)) / RenderSize();
+ fSrcMotionVector = FFX_FSR_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(fUv, fSrcDepth, cbFSR3Upscaler.mReprojectionMatrix);
+ }
+#endif
+ // GODOT ENDS
+
+ FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
+
+#if FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS
+ fUvMotionVector -= MotionVectorJitterCancellation();
+#endif
+
+ return fUvMotionVector;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED) uniform texture2D r_internal_upscaled_color;
+
+FfxFloat32x4 LoadHistory(FfxInt32x2 iPxHistory)
+{
+ return texelFetch(r_internal_upscaled_color, iPxHistory, 0);
+}
+
+FfxFloat32x4 SampleHistory(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_internal_upscaled_color, s_LinearClamp), fUV, 0.0);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history;
+
+void StoreLumaHistory(FfxInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
+{
+ imageStore(rw_luma_history, iPxPos, fLumaHistory);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_LUMA_HISTORY) uniform texture2D r_luma_history;
+
+FfxFloat32x4 LoadLumaHistory(FfxInt32x2 iPxPos)
+{
+ return texelFetch(r_luma_history, iPxPos, 0);
+}
+
+FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_luma_history, s_LinearClamp), fUV, 0.0);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_RCAS_INPUT)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_RCAS_INPUT) uniform texture2D r_rcas_input;
+
+FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos)
+{
+ return texelFetch(r_rcas_input, iPxPos, 0);
+}
+
+FfxFloat32x3 SampleRCAS_Input(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_rcas_input, s_LinearClamp), fUV, 0.0).rgb;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED, rgba16f) writeonly uniform image2D rw_internal_upscaled_color;
+
+void StoreReprojectedHistory(FfxInt32x2 iPxHistory, FfxFloat32x4 fHistory)
+{
+ imageStore(rw_internal_upscaled_color, iPxHistory, fHistory);
+}
+
+void StoreInternalColorAndWeight(FfxInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
+{
+ imageStore(rw_internal_upscaled_color, iPxPos, fColorAndWeight);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output;
+
+void StoreUpscaledOutput(FfxInt32x2 iPxPos, FfxFloat32x3 fColor)
+{
+ imageStore(rw_upscaled_output, iPxPos, FfxFloat32x4(fColor, 1.0));
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_ACCUMULATION)
+layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_ACCUMULATION) uniform texture2D r_accumulation;
+
+FfxFloat32 SampleAccumulation(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_accumulation, s_LinearClamp), fUV, 0.0).x;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_ACCUMULATION)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_ACCUMULATION, r8) uniform image2D rw_accumulation;
+
+void StoreAccumulation(FfxInt32x2 iPxPos, FfxFloat32 fAccumulation)
+{
+ imageStore(rw_accumulation, iPxPos, vec4(fAccumulation, 0.0, 0.0, 0.0));
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_SHADING_CHANGE)
+layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_SHADING_CHANGE) uniform texture2D r_shading_change;
+
+FfxFloat32 LoadShadingChange(FfxInt32x2 iPxPos)
+{
+ return texelFetch(r_shading_change, iPxPos, 0).x * cbFSR3Upscaler.fShadingChangeScale;
+}
+
+FfxFloat32 SampleShadingChange(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_shading_change, s_LinearClamp), fUV, 0.0).x * cbFSR3Upscaler.fShadingChangeScale;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_SHADING_CHANGE)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SHADING_CHANGE, r8) uniform image2D rw_shading_change;
+
+void StoreShadingChange(FfxInt32x2 iPxPos, FfxFloat32 fShadingChange)
+{
+ imageStore(rw_shading_change, iPxPos, vec4(fShadingChange, 0.0, 0.0, 0.0));
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH)
+layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH) uniform texture2D r_farthest_depth;
+
+FfxInt32x2 GetFarthestDepthResourceDimensions()
+{
+ return textureSize(r_farthest_depth, 0).xy;
+}
+
+FfxFloat32 LoadFarthestDepth(FfxInt32x2 iPxPos)
+{
+ return texelFetch(r_farthest_depth, iPxPos, 0).x;
+}
+
+FfxFloat32 SampleFarthestDepth(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_farthest_depth, s_LinearClamp), fUV, 0.0).x;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH, r16f) uniform image2D rw_farthest_depth;
+
+void StoreFarthestDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth)
+{
+ imageStore(rw_farthest_depth, iPxPos, vec4(fDepth, 0.0, 0.0, 0.0));
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1)
+layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1) uniform texture2D r_farthest_depth_mip1;
+
+FfxInt32x2 GetFarthestDepthMip1ResourceDimensions()
+{
+ return textureSize(r_farthest_depth_mip1, 0).xy;
+}
+
+FfxFloat32 LoadFarthestDepthMip1(FfxInt32x2 iPxPos)
+{
+ return texelFetch(r_farthest_depth_mip1, iPxPos, 0).x;
+}
+
+FfxFloat32 SampleFarthestDepthMip1(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_farthest_depth_mip1, s_LinearClamp), fUV, 0.0).x;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1, r16f) uniform image2D rw_farthest_depth_mip1;
+
+void StoreFarthestDepthMip1(FfxInt32x2 iPxPos, FfxFloat32 fDepth)
+{
+ imageStore(rw_farthest_depth_mip1, iPxPos, vec4(fDepth, 0.0, 0.0, 0.0));
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_CURRENT_LUMA)
+layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_CURRENT_LUMA) uniform texture2D r_current_luma;
+
+FfxFloat32 LoadCurrentLuma(FfxInt32x2 iPxPos)
+{
+ return texelFetch(r_current_luma, iPxPos, 0).r;
+}
+
+FfxFloat32 SampleCurrentLuma(FfxFloat32x2 uv)
+{
+ return textureLod(sampler2D(r_current_luma, s_LinearClamp), uv, 0.0).r;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_CURRENT_LUMA)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_CURRENT_LUMA, r16f) uniform image2D rw_current_luma;
+
+void StoreCurrentLuma(FfxInt32x2 iPxPos, FfxFloat32 fLuma)
+{
+ imageStore(rw_current_luma, iPxPos, vec4(fLuma, 0.0, 0.0, 0.0));
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY)
+layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY) uniform texture2D r_luma_instability;
+
+FfxFloat32 SampleLumaInstability(FfxFloat32x2 uv)
+{
+ return textureLod(sampler2D(r_luma_instability, s_LinearClamp), uv, 0.0).x;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY, r16f) uniform image2D rw_luma_instability;
+
+void StoreLumaInstability(FfxInt32x2 iPxPos, FfxFloat32 fLumaInstability)
+{
+ imageStore(rw_luma_instability, iPxPos, vec4(fLumaInstability, 0.0, 0.0, 0.0));
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA)
+layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA) uniform texture2D r_previous_luma;
+
+FfxFloat32 LoadPreviousLuma(FfxInt32x2 iPxPos)
+{
+ return texelFetch(r_previous_luma, iPxPos, 0).r;
+}
+
+FfxFloat32 SamplePreviousLuma(FfxFloat32x2 uv)
+{
+ return textureLod(sampler2D(r_previous_luma, s_LinearClamp), uv, 0.0).r;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_NEW_LOCKS)
+layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_NEW_LOCKS) uniform texture2D r_new_locks;
+
+FfxFloat32 LoadNewLocks(FfxInt32x2 iPxPos)
+{
+ return texelFetch(r_new_locks, iPxPos, 0).r;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_NEW_LOCKS)
+layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_NEW_LOCKS, r8) uniform image2D rw_new_locks;
+
+FfxFloat32 LoadRwNewLocks(FfxInt32x2 iPxPos)
+{
+ return imageLoad(rw_new_locks, iPxPos).r;
+}
+
+void StoreNewLocks(FfxInt32x2 iPxPos, FfxFloat32 newLock)
+{
+ imageStore(rw_new_locks, iPxPos, vec4(newLock, 0, 0, 0));
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) uniform utexture2D r_reconstructed_previous_nearest_depth;
+
+FfxFloat32 LoadReconstructedPrevDepth(FfxInt32x2 iPxPos)
+{
+ return uintBitsToFloat(texelFetch(r_reconstructed_previous_nearest_depth, iPxPos, 0).r);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth;
+
+void StoreReconstructedDepth(FfxInt32x2 iPxSample, FfxFloat32 fDepth)
+{
+ FfxUInt32 uDepth = floatBitsToUint(fDepth);
+
+ #if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+ imageAtomicMax(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth);
+ #else
+ imageAtomicMin(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); // min for standard, max for inverted depth
+ #endif
+}
+
+void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue)
+{
+ imageStore(rw_reconstructed_previous_nearest_depth, iPxSample, uvec4(uValue, 0, 0, 0));
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_DILATED_DEPTH, r32f) writeonly uniform image2D rw_dilated_depth;
+
+void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
+{
+ imageStore(rw_dilated_depth, iPxPos, vec4(fDepth, 0.0, 0.0, 0.0));
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) writeonly uniform image2D rw_dilated_motion_vectors;
+
+void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
+{
+ imageStore(rw_dilated_motion_vectors, iPxPos, vec4(fMotionVector, 0.0, 0.0));
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors;
+
+FfxFloat32x2 LoadDilatedMotionVector(FfxInt32x2 iPxInput)
+{
+ return texelFetch(r_dilated_motion_vectors, iPxInput, 0).xy;
+}
+
+FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_dilated_motion_vectors, s_LinearClamp), fUV, 0.0).xy;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilated_depth;
+
+FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput)
+{
+ return texelFetch(r_dilated_depth, iPxInput, 0).r;
+}
+
+FfxFloat32 SampleDilatedDepth(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_dilated_depth, s_LinearClamp), fUV, 0.0).r;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE) uniform texture2D r_input_exposure;
+
+FfxFloat32 Exposure()
+{
+ FfxFloat32 exposure = texelFetch(r_input_exposure, FfxInt32x2(0, 0), 0).x;
+
+ if (exposure == 0.0) {
+ exposure = 1.0;
+ }
+
+ return exposure;
+}
+#endif
+
+// BEGIN: FSR3UPSCALER_BIND_SRV_LANCZOS_LUT
+#if defined(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_LANCZOS_LUT) uniform texture2D r_lanczos_lut;
+#endif
+
+FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
+{
+#if defined(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT)
+ return textureLod(sampler2D(r_lanczos_lut, s_LinearClamp), FfxFloat32x2(x / 2.0, 0.5), 0.0).x;
+#else
+ return 0.f;
+#endif
+}
+// END: FSR3UPSCALER_BIND_SRV_LANCZOS_LUT
+
+#if defined(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS) uniform texture2D r_dilated_reactive_masks;
+
+FfxFloat32x4 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
+{
+ return textureLod(sampler2D(r_dilated_reactive_masks, s_LinearClamp), fUV, 0.0);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS, rgba8) writeonly uniform image2D rw_dilated_reactive_masks;
+
+void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fDilatedReactiveMasks)
+{
+ imageStore(rw_dilated_reactive_masks, iPxPos, fDilatedReactiveMasks);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY)
+layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY) uniform texture2D r_input_opaque_only;
+
+FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
+{
+ return texelFetch(r_input_opaque_only, iPxPos, 0).xyz;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR)
+layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR) uniform texture2D r_input_prev_color_pre_alpha;
+
+FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
+{
+ return texelFetch(r_input_prev_color_pre_alpha, iPxPos, 0).xyz;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR)
+layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR) uniform texture2D r_input_prev_color_post_alpha;
+
+FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
+{
+ return texelFetch(r_input_prev_color_post_alpha, iPxPos, 0).xyz;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE) && \
+ defined(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION)
+
+layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_AUTOREACTIVE, r32f) uniform image2D rw_output_autoreactive;
+
+// GODOT BEGINS
+// The binding is never used, so comment it out to avoid compilation errors.
+// layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION, r32f) uniform image2D rw_output_autocomposition;
+//
+// void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive)
+// {
+// imageStore(rw_output_autoreactive, iPxPos, FfxFloat32x4(FfxFloat32(fReactive.x), 0.0, 0.0, 0.0));
+//
+// imageStore(rw_output_autocomposition, iPxPos, FfxFloat32x4(FfxFloat32(fReactive.y), 0.0, 0.0, 0.0));
+// }
+// GODOT ENDS
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR)
+layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_pre_alpha;
+
+void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
+{
+ imageStore(rw_output_prev_color_pre_alpha, iPxPos, FfxFloat32x4(color, 0.0));
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR)
+layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_post_alpha;
+
+void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
+{
+ imageStore(rw_output_prev_color_post_alpha, iPxPos, FfxFloat32x4(color, 0.0));
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_FRAME_INFO)
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_FRAME_INFO, rgba32f) uniform image2D rw_frame_info;
+
+FfxFloat32x4 LoadFrameInfo()
+{
+ return imageLoad(rw_frame_info, ivec2(0, 0));
+}
+
+void StoreFrameInfo(FfxFloat32x4 fInfo)
+{
+ imageStore(rw_frame_info, ivec2(0, 0), fInfo);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_FRAME_INFO)
+layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_FRAME_INFO) uniform texture2D r_frame_info;
+
+FfxFloat32x4 FrameInfo()
+{
+ return texelFetch(r_frame_info, ivec2(0, 0), 0);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0) && \
+ defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1) && \
+ defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2) && \
+ defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3) && \
+ defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4) && \
+ defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5)
+
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0, rg16f) uniform image2D rw_spd_mip0;
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1, rg16f) uniform image2D rw_spd_mip1;
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2, rg16f) uniform image2D rw_spd_mip2;
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3, rg16f) uniform image2D rw_spd_mip3;
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4, rg16f) uniform image2D rw_spd_mip4;
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5, rg16f) coherent uniform image2D rw_spd_mip5;
+
+FfxFloat32x2 RWLoadPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 index)
+{
+#define LOAD(idx) \
+ if (index == idx) \
+ { \
+ return imageLoad(rw_spd_mip##idx, iPxPos).xy; \
+ }
+ LOAD(0);
+ LOAD(1);
+ LOAD(2);
+ LOAD(3);
+ LOAD(4);
+ LOAD(5);
+
+ return FfxFloat32x2(0.0, 0.0);
+
+#undef LOAD
+}
+
+void StorePyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 outValue, FFX_PARAMETER_IN FfxUInt32 index)
+{
+#define STORE(idx) \
+ if (index == idx) \
+ { \
+ imageStore(rw_spd_mip##idx, iPxPos, vec4(outValue, 0.0, 0.0)); \
+ }
+
+ STORE(0);
+ STORE(1);
+ STORE(2);
+ STORE(3);
+ STORE(4);
+ STORE(5);
+
+#undef STORE
+}
+#endif
+
+#if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC
+layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui) coherent uniform uimage2D rw_spd_global_atomic;
+
+void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter)
+{
+ spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0, 0), 1);
+}
+
+void SPD_ResetAtomicCounter()
+{
+ imageStore(rw_spd_global_atomic, ivec2(0, 0), uvec4(0));
+}
+#endif
+
+#endif // #if defined(FFX_GPU)
diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h
new file mode 100644
index 000000000000..ab3969ed5f76
--- /dev/null
+++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h
@@ -0,0 +1,990 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2024 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "ffx_fsr3upscaler_resources.h"
+
+#if defined(FFX_GPU)
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic push
+#pragma dxc diagnostic ignored "-Wambig-lit-shift"
+#endif //__hlsl_dx_compiler
+#include "../ffx_core.h"
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic pop
+#endif //__hlsl_dx_compiler
+#endif // #if defined(FFX_GPU)
+
+#if defined(FFX_GPU)
+#ifndef FFX_PREFER_WAVE64
+#define FFX_PREFER_WAVE64
+#endif // FFX_PREFER_WAVE64
+
+#pragma warning(disable: 3205) // conversion from larger type to smaller
+
+#define DECLARE_SRV_REGISTER(regIndex) t##regIndex
+#define DECLARE_UAV_REGISTER(regIndex) u##regIndex
+#define DECLARE_CB_REGISTER(regIndex) b##regIndex
+#define FFX_FSR3UPSCALER_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex))
+#define FFX_FSR3UPSCALER_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex))
+#define FFX_FSR3UPSCALER_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex))
+
+#if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER)
+cbuffer cbFSR3Upscaler : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_FSR3UPSCALER)
+{
+ FfxInt32x2 iRenderSize;
+ FfxInt32x2 iPreviousFrameRenderSize;
+
+ FfxInt32x2 iUpscaleSize;
+ FfxInt32x2 iPreviousFrameUpscaleSize;
+
+ FfxInt32x2 iMaxRenderSize;
+ FfxInt32x2 iMaxUpscaleSize;
+
+ FfxFloat32x4 fDeviceToViewDepth;
+
+ FfxFloat32x2 fJitter;
+ FfxFloat32x2 fPreviousFrameJitter;
+
+ FfxFloat32x2 fMotionVectorScale;
+ FfxFloat32x2 fDownscaleFactor;
+
+ FfxFloat32x2 fMotionVectorJitterCancellation;
+ FfxFloat32 fTanHalfFOV;
+ FfxFloat32 fJitterSequenceLength;
+
+ FfxFloat32 fDeltaTime;
+ FfxFloat32 fDeltaPreExposure;
+ FfxFloat32 fViewSpaceToMetersFactor;
+ FfxFloat32 fFrameIndex;
+
+ FfxFloat32 fVelocityFactor;
+ FfxFloat32 fReactivenessScale;
+ FfxFloat32 fShadingChangeScale;
+ FfxFloat32 fAccumulationAddedPerFrame;
+ FfxFloat32 fMinDisocclusionAccumulation;
+};
+
+#define FFX_FSR3UPSCALER_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR3Upscaler) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR3Upscaler size.
+
+/* Define getter functions in the order they are defined in the CB! */
+FfxInt32x2 RenderSize()
+{
+ return iRenderSize;
+}
+
+FfxInt32x2 PreviousFrameRenderSize()
+{
+ return iPreviousFrameRenderSize;
+}
+
+FfxInt32x2 MaxRenderSize()
+{
+ return iMaxRenderSize;
+}
+
+FfxInt32x2 UpscaleSize()
+{
+ return iUpscaleSize;
+}
+
+FfxInt32x2 PreviousFrameUpscaleSize()
+{
+ return iPreviousFrameUpscaleSize;
+}
+
+FfxInt32x2 MaxUpscaleSize()
+{
+ return iMaxUpscaleSize;
+}
+
+FfxFloat32x2 Jitter()
+{
+ return fJitter;
+}
+
+FfxFloat32x2 PreviousFrameJitter()
+{
+ return fPreviousFrameJitter;
+}
+
+FfxFloat32x4 DeviceToViewSpaceTransformFactors()
+{
+ return fDeviceToViewDepth;
+}
+
+FfxFloat32x2 MotionVectorScale()
+{
+ return fMotionVectorScale;
+}
+
+FfxFloat32x2 DownscaleFactor()
+{
+ return fDownscaleFactor;
+}
+
+FfxFloat32x2 MotionVectorJitterCancellation()
+{
+ return fMotionVectorJitterCancellation;
+}
+
+FfxFloat32 TanHalfFoV()
+{
+ return fTanHalfFOV;
+}
+
+FfxFloat32 JitterSequenceLength()
+{
+ return fJitterSequenceLength;
+}
+
+FfxFloat32 DeltaTime()
+{
+ return fDeltaTime;
+}
+
+FfxFloat32 DeltaPreExposure()
+{
+ return fDeltaPreExposure;
+}
+
+FfxFloat32 ViewSpaceToMetersFactor()
+{
+ return fViewSpaceToMetersFactor;
+}
+
+FfxFloat32 FrameIndex()
+{
+ return fFrameIndex;
+}
+
+FfxFloat32 VelocityFactor()
+{
+ return fVelocityFactor;
+}
+
+FfxFloat32 AccumulationAddedPerFrame()
+{
+ return fAccumulationAddedPerFrame;
+}
+
+FfxFloat32 MinDisocclusionAccumulation()
+{
+ return fMinDisocclusionAccumulation;
+}
+
+#endif // #if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER)
+
+#define FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(p) FFX_FSR3UPSCALER_ROOTSIG_STR(p)
+#define FFX_FSR3UPSCALER_ROOTSIG_STR(p) #p
+#define FFX_FSR3UPSCALER_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "CBV(b0), " \
+ "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
+ "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+
+#define FFX_FSR3UPSCALER_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size.
+
+#define FFX_FSR3UPSCALER_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "CBV(b0), " \
+ "CBV(b1), " \
+ "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
+ "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+#if defined(FFX_FSR3UPSCALER_EMBED_ROOTSIG)
+#define FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT FFX_FSR3UPSCALER_ROOTSIG
+#define FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR3UPSCALER_CB2_ROOTSIG
+#else
+#define FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT
+#define FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT
+#endif // #if FFX_FSR3UPSCALER_EMBED_ROOTSIG
+
+#if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE)
+cbuffer cbGenerateReactive : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_AUTOREACTIVE)
+{
+ FfxFloat32 fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels
+ FfxFloat32 fTcScale;
+ FfxFloat32 fReactiveScale;
+ FfxFloat32 fReactiveMax;
+};
+
+FfxFloat32 TcThreshold()
+{
+ return fTcThreshold;
+}
+
+FfxFloat32 TcScale()
+{
+ return fTcScale;
+}
+
+FfxFloat32 ReactiveScale()
+{
+ return fReactiveScale;
+}
+
+FfxFloat32 ReactiveMax()
+{
+ return fReactiveMax;
+}
+#endif // #if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE)
+
+#if defined(FSR3UPSCALER_BIND_CB_RCAS)
+cbuffer cbRCAS : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_RCAS)
+{
+ FfxUInt32x4 rcasConfig;
+};
+
+FfxUInt32x4 RCASConfig()
+{
+ return rcasConfig;
+}
+#endif // #if defined(FSR3UPSCALER_BIND_CB_RCAS)
+
+
+#if defined(FSR3UPSCALER_BIND_CB_REACTIVE)
+cbuffer cbGenerateReactive : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_REACTIVE)
+{
+ FfxFloat32 gen_reactive_scale;
+ FfxFloat32 gen_reactive_threshold;
+ FfxFloat32 gen_reactive_binaryValue;
+ FfxUInt32 gen_reactive_flags;
+};
+
+FfxFloat32 GenReactiveScale()
+{
+ return gen_reactive_scale;
+}
+
+FfxFloat32 GenReactiveThreshold()
+{
+ return gen_reactive_threshold;
+}
+
+FfxFloat32 GenReactiveBinaryValue()
+{
+ return gen_reactive_binaryValue;
+}
+
+FfxUInt32 GenReactiveFlags()
+{
+ return gen_reactive_flags;
+}
+#endif // #if defined(FSR3UPSCALER_BIND_CB_REACTIVE)
+
+#if defined(FSR3UPSCALER_BIND_CB_SPD)
+cbuffer cbSPD : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_SPD) {
+
+ FfxUInt32 mips;
+ FfxUInt32 numWorkGroups;
+ FfxUInt32x2 workGroupOffset;
+ FfxUInt32x2 renderSize;
+};
+
+FfxUInt32 MipCount()
+{
+ return mips;
+}
+
+FfxUInt32 NumWorkGroups()
+{
+ return numWorkGroups;
+}
+
+FfxUInt32x2 WorkGroupOffset()
+{
+ return workGroupOffset;
+}
+
+FfxUInt32x2 SPD_RenderSize()
+{
+ return renderSize;
+}
+#endif // #if defined(FSR3UPSCALER_BIND_CB_SPD)
+
+SamplerState s_PointClamp : register(s0);
+SamplerState s_LinearClamp : register(s1);
+
+#if defined(FSR3UPSCALER_BIND_SRV_SPD_MIPS)
+Texture2D r_spd_mips : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_SPD_MIPS);
+
+FfxInt32x2 GetSPDMipDimensions(FfxUInt32 uMipLevel)
+{
+ FfxUInt32 uWidth;
+ FfxUInt32 uHeight;
+ FfxUInt32 uLevels;
+ r_spd_mips.GetDimensions(uMipLevel, uWidth, uHeight, uLevels);
+
+ return FfxInt32x2(uWidth, uHeight);
+}
+
+FfxFloat32x2 SampleSPDMipLevel(FfxFloat32x2 fUV, FfxUInt32 mipLevel)
+{
+ return r_spd_mips.SampleLevel(s_LinearClamp, fUV, mipLevel);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH)
+Texture2D r_input_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH);
+
+FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos)
+{
+ return r_input_depth[iPxPos];
+}
+
+FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV)
+{
+ return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK)
+Texture2D r_reactive_mask : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK);
+
+FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos)
+{
+ return r_reactive_mask[iPxPos] * fReactivenessScale;
+}
+
+FfxInt32x2 GetReactiveMaskResourceDimensions()
+{
+ FfxUInt32 uWidth;
+ FfxUInt32 uHeight;
+ r_reactive_mask.GetDimensions(uWidth, uHeight);
+
+ return FfxInt32x2(uWidth, uHeight);
+}
+
+FfxFloat32 SampleReactiveMask(FfxFloat32x2 fUV)
+{
+ return r_reactive_mask.SampleLevel(s_LinearClamp, fUV, 0).x * fReactivenessScale;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
+Texture2D r_transparency_and_composition_mask : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK);
+
+FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
+{
+ return r_transparency_and_composition_mask[iPxPos];
+}
+
+FfxInt32x2 GetTransparencyAndCompositionMaskResourceDimensions()
+{
+ FfxUInt32 uWidth;
+ FfxUInt32 uHeight;
+ r_transparency_and_composition_mask.GetDimensions(uWidth, uHeight);
+
+ return FfxInt32x2(uWidth, uHeight);
+}
+
+FfxFloat32 SampleTransparencyAndCompositionMask(FfxFloat32x2 fUV)
+{
+ return r_transparency_and_composition_mask.SampleLevel(s_LinearClamp, fUV, 0).x;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_COLOR)
+Texture2D r_input_color_jittered : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_COLOR);
+
+FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos)
+{
+ return r_input_color_jittered[iPxPos].rgb;
+}
+
+FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
+{
+ return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS)
+Texture2D r_input_motion_vectors : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS);
+
+FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos)
+{
+ FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy;
+
+ FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
+
+#if FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS
+ fUvMotionVector -= MotionVectorJitterCancellation();
+#endif
+
+ return fUvMotionVector;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED)
+Texture2D r_internal_upscaled_color : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED);
+
+FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory)
+{
+ return r_internal_upscaled_color[iPxHistory];
+}
+
+FfxFloat32x4 SampleHistory(FfxFloat32x2 fUV)
+{
+ return r_internal_upscaled_color.SampleLevel(s_LinearClamp, fUV, 0);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY)
+RWTexture2D rw_luma_history : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY);
+
+void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
+{
+ rw_luma_history[iPxPos] = fLumaHistory;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY)
+Texture2D r_luma_history : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY);
+
+FfxFloat32x4 LoadLumaHistory(FfxInt32x2 iPxPos)
+{
+ return r_luma_history[iPxPos];
+}
+
+FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
+{
+ return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_RCAS_INPUT)
+Texture2D r_rcas_input : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_RCAS_INPUT);
+
+FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos)
+{
+ return r_rcas_input[iPxPos];
+}
+
+FfxFloat32x3 SampleRCAS_Input(FfxFloat32x2 fUV)
+{
+ return r_rcas_input.SampleLevel(s_LinearClamp, fUV, 0).rgb;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED)
+RWTexture2D rw_internal_upscaled_color : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED);
+
+void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory)
+{
+ rw_internal_upscaled_color[iPxHistory] = fHistory;
+}
+
+void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
+{
+ rw_internal_upscaled_color[iPxPos] = fColorAndWeight;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT)
+RWTexture2D rw_upscaled_output : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT);
+
+void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
+{
+ rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_ACCUMULATION)
+Texture2D r_accumulation : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_ACCUMULATION);
+
+FfxFloat32 SampleAccumulation(FfxFloat32x2 fUV)
+{
+ return r_accumulation.SampleLevel(s_LinearClamp, fUV, 0);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_ACCUMULATION)
+RWTexture2D rw_accumulation : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_ACCUMULATION);
+
+void StoreAccumulation(FfxUInt32x2 iPxPos, FfxFloat32 fAccumulation)
+{
+ rw_accumulation[iPxPos] = fAccumulation;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_SHADING_CHANGE)
+Texture2D r_shading_change : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_SHADING_CHANGE);
+
+FfxFloat32 LoadShadingChange(FfxUInt32x2 iPxPos)
+{
+ return r_shading_change[iPxPos] * fShadingChangeScale;
+}
+
+FfxFloat32 SampleShadingChange(FfxFloat32x2 fUV)
+{
+ return r_shading_change.SampleLevel(s_LinearClamp, fUV, 0) * fShadingChangeScale;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_SHADING_CHANGE)
+RWTexture2D rw_shading_change : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SHADING_CHANGE);
+
+void StoreShadingChange(FfxUInt32x2 iPxPos, FfxFloat32 fShadingChange)
+{
+ rw_shading_change[iPxPos] = fShadingChange;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH)
+Texture2D r_farthest_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH);
+
+FfxInt32x2 GetFarthestDepthResourceDimensions()
+{
+ FfxUInt32 uWidth;
+ FfxUInt32 uHeight;
+ r_farthest_depth.GetDimensions(uWidth, uHeight);
+
+ return FfxInt32x2(uWidth, uHeight);
+}
+
+FfxFloat32 LoadFarthestDepth(FfxUInt32x2 iPxPos)
+{
+ return r_farthest_depth[iPxPos];
+}
+
+FfxFloat32 SampleFarthestDepth(FfxFloat32x2 fUV)
+{
+ return r_farthest_depth.SampleLevel(s_LinearClamp, fUV, 0);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH)
+RWTexture2D rw_farthest_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH);
+
+void StoreFarthestDepth(FfxUInt32x2 iPxPos, FfxFloat32 fDepth)
+{
+ rw_farthest_depth[iPxPos] = fDepth;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1)
+Texture2D r_farthest_depth_mip1 : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1);
+
+FfxInt32x2 GetFarthestDepthMip1ResourceDimensions()
+{
+ FfxUInt32 uWidth;
+ FfxUInt32 uHeight;
+ r_farthest_depth_mip1.GetDimensions(uWidth, uHeight);
+
+ return FfxInt32x2(uWidth, uHeight);
+}
+
+FfxFloat32 LoadFarthestDepthMip1(FfxUInt32x2 iPxPos)
+{
+ return r_farthest_depth_mip1[iPxPos];
+}
+
+FfxFloat32 SampleFarthestDepthMip1(FfxFloat32x2 fUV)
+{
+ return r_farthest_depth_mip1.SampleLevel(s_LinearClamp, fUV, 0);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1)
+RWTexture2D rw_farthest_depth_mip1 : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1);
+
+void StoreFarthestDepthMip1(FfxUInt32x2 iPxPos, FfxFloat32 fDepth)
+{
+ rw_farthest_depth_mip1[iPxPos] = fDepth;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_CURRENT_LUMA)
+Texture2D r_current_luma : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_CURRENT_LUMA);
+
+FfxFloat32 LoadCurrentLuma(FfxUInt32x2 iPxPos)
+{
+ return r_current_luma[iPxPos];
+}
+
+FfxFloat32 SampleCurrentLuma(FfxFloat32x2 uv)
+{
+ return r_current_luma.SampleLevel(s_LinearClamp, uv, 0);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_CURRENT_LUMA)
+RWTexture2D